[llvm] [RISCV][TTI] Implement instruction cost for vp.reduce.* (PR #114184)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 21:39:02 PST 2024
https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/114184
>From f1055d057ebfd2396c84d445dd7305c9562fa7e5 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 18:49:05 -0700
Subject: [PATCH 1/7] Precommit testcases.
---
.../Analysis/CostModel/RISCV/reduce-add.ll | 141 +++++--
.../Analysis/CostModel/RISCV/reduce-and.ll | 147 +++++--
.../Analysis/CostModel/RISCV/reduce-fadd.ll | 385 +++++++++++++++---
.../CostModel/RISCV/reduce-fmaximum.ll | 182 ++++++++-
.../CostModel/RISCV/reduce-fminimum.ll | 98 ++++-
.../Analysis/CostModel/RISCV/reduce-fmul.ll | 355 +++++++++++++++-
.../Analysis/CostModel/RISCV/reduce-max.ll | 275 ++++++++++---
.../Analysis/CostModel/RISCV/reduce-min.ll | 272 ++++++++++---
.../Analysis/CostModel/RISCV/reduce-or.ll | 150 +++++--
.../CostModel/RISCV/reduce-scalable-fp.ll | 211 +++++++++-
.../CostModel/RISCV/reduce-scalable-int.ll | 352 +++++++++++++++-
.../Analysis/CostModel/RISCV/reduce-xor.ll | 141 +++++--
12 files changed, 2421 insertions(+), 288 deletions(-)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 6032ae01aa718b..1edcdecb923e57 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index a8eb4e9a280dd0..f72298b8060631 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
@@ -17,6 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -31,6 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -44,6 +68,18 @@ define i32 @reduce_i1(i32 %arg) {
%V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
%V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
%V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+ %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+ %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+ %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
ret i32 undef
}
@@ -57,6 +93,13 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -68,6 +111,13 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -78,6 +128,14 @@ define i32 @reduce_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
ret i32 undef
}
@@ -91,6 +149,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -102,6 +168,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -112,6 +186,15 @@ define i32 @reduce_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -125,6 +208,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -136,6 +227,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -146,32 +245,19 @@ define i32 @reduce_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
@@ -180,6 +266,15 @@ define i32 @reduce_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 1762f701a9b2d5..f1636785b90b81 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefix=SIZE
define void @reduce_fadd_bfloat() {
; FP-REDUCE-LABEL: 'reduce_fadd_bfloat'
@@ -19,6 +19,20 @@ define void @reduce_fadd_bfloat() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fadd_bfloat'
@@ -36,6 +50,20 @@ define void @reduce_fadd_bfloat() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -52,6 +80,21 @@ define void @reduce_fadd_bfloat() {
%NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
%NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
%NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+ %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -71,6 +114,20 @@ define void @reduce_fadd_half() {
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -88,6 +145,20 @@ define void @reduce_fadd_half() {
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fadd_half'
@@ -105,6 +176,20 @@ define void @reduce_fadd_half() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -121,6 +206,21 @@ define void @reduce_fadd_half() {
%NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
%NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
%NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+ %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -139,6 +239,20 @@ define void @reduce_fadd_float() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fadd_float'
@@ -155,6 +269,20 @@ define void @reduce_fadd_float() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -170,6 +298,21 @@ define void @reduce_fadd_float() {
%NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
%NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
%NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+ %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.0, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -187,6 +330,20 @@ define void @reduce_fadd_double() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fadd_double'
@@ -202,6 +359,20 @@ define void @reduce_fadd_double() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -216,42 +387,27 @@ define void @reduce_fadd_double() {
%NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
%NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
%NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+ %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.0, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.0, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
define void @reduce_ordered_fadd_bfloat() {
; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat'
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; SIZE-LABEL: 'reduce_ordered_fadd_bfloat'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -268,43 +424,25 @@ define void @reduce_ordered_fadd_bfloat() {
%NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
%NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
%NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+ %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
define void @reduce_ordered_fadd_half() {
-; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half'
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half'
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fadd_half'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
@@ -321,6 +459,20 @@ define void @reduce_ordered_fadd_half() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -337,6 +489,21 @@ define void @reduce_ordered_fadd_half() {
%NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
%NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
%NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+ %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -355,6 +522,20 @@ define void @reduce_ordered_fadd_float() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fadd_float'
@@ -371,6 +552,20 @@ define void @reduce_ordered_fadd_float() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -386,6 +581,21 @@ define void @reduce_ordered_fadd_float() {
%NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
%NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
%NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+ %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.0, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -403,6 +613,20 @@ define void @reduce_ordered_fadd_double() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fadd_double'
@@ -418,6 +642,20 @@ define void @reduce_ordered_fadd_double() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -432,5 +670,20 @@ define void @reduce_ordered_fadd_double() {
%NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
%NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
%NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+ %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.0, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.0, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index b14c60012077de..dedeb4be67ae82 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
@@ -20,6 +22,34 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -37,6 +67,34 @@ define float @reduce_fmaximum_f32(float %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -53,6 +111,36 @@ call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef)
call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
+
+%V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+%V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+%V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+%V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+%V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+%V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+%V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+
+call float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call fast float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
ret float undef
}
declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
@@ -77,6 +165,34 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -92,6 +208,34 @@ define double @reduce_fmaximum_f64(double %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
@@ -106,6 +250,36 @@ call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef)
call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
+
+call double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+
+call double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call fast double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
ret double undef
}
declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 2172a85bc46aaf..6d51911b4fc408 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define float @reduce_fmaximum_f32(float %arg) {
; CHECK-LABEL: 'reduce_fmaximum_f32'
@@ -13,6 +15,20 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -23,6 +39,20 @@ define float @reduce_fmaximum_f32(float %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -32,6 +62,22 @@ define float @reduce_fmaximum_f32(float %arg) {
%V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
%V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
%V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
+
+%V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+%V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+%V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+%V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+%V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+%V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+%V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+
+call float @llvm.vp.reduce.fmin.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+call float @llvm.vp.reduce.fmin.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
ret float undef
}
declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
@@ -50,6 +96,20 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -59,6 +119,20 @@ define double @reduce_fmaximum_f64(double %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
@@ -67,6 +141,22 @@ define double @reduce_fmaximum_f64(double %arg) {
%V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
%V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
%V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
+
+%V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+%V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+%V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+%V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+%V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+%V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+%V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+
+call double @llvm.vp.reduce.fmin.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+call double @llvm.vp.reduce.fmin.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
ret double undef
}
declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index 211bcb1343eea4..dc43a54ff1855b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefix=SIZE
define void @reduce_fmul_bfloat() {
; FP-REDUCE-LABEL: 'reduce_fmul_bfloat'
@@ -19,6 +19,20 @@ define void @reduce_fmul_bfloat() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fmul_bfloat'
@@ -36,6 +50,20 @@ define void @reduce_fmul_bfloat() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -52,6 +80,21 @@ define void @reduce_fmul_bfloat() {
%NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
%NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
%NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+ %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -71,6 +114,20 @@ define void @reduce_fmul_half() {
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
@@ -88,6 +145,20 @@ define void @reduce_fmul_half() {
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fmul_half'
@@ -105,6 +176,20 @@ define void @reduce_fmul_half() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -121,6 +206,21 @@ define void @reduce_fmul_half() {
%NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
%NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
%NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+ %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -139,6 +239,20 @@ define void @reduce_fmul_float() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fmul_float'
@@ -155,6 +269,20 @@ define void @reduce_fmul_float() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -170,6 +298,21 @@ define void @reduce_fmul_float() {
%NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
%NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
%NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+ %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.0, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -187,6 +330,20 @@ define void @reduce_fmul_double() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fmul_double'
@@ -202,6 +359,20 @@ define void @reduce_fmul_double() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -216,6 +387,21 @@ define void @reduce_fmul_double() {
%NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
%NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
%NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+ %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.0, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.0, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -235,6 +421,20 @@ define void @reduce_ordered_fmul_bfloat() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
@@ -252,6 +452,20 @@ define void @reduce_ordered_fmul_bfloat() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -268,6 +482,21 @@ define void @reduce_ordered_fmul_bfloat() {
%NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
%NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
%NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
+
+ %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -287,6 +516,20 @@ define void @reduce_ordered_fmul_half() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fmul_half'
@@ -304,6 +547,20 @@ define void @reduce_ordered_fmul_half() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -320,6 +577,21 @@ define void @reduce_ordered_fmul_half() {
%NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
%NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
%NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
+
+ %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+ %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
ret void
}
@@ -338,6 +610,19 @@ define void @reduce_ordered_fmul_float() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fmul_float'
@@ -354,6 +639,19 @@ define void @reduce_ordered_fmul_float() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -369,6 +667,20 @@ define void @reduce_ordered_fmul_float() {
%NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
%NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
%NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
+
+ %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+ %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
ret void
}
@@ -386,6 +698,18 @@ define void @reduce_ordered_fmul_double() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fmul_double'
@@ -401,6 +725,18 @@ define void @reduce_ordered_fmul_double() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -415,5 +751,18 @@ define void @reduce_ordered_fmul_double() {
%NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
%NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
%NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
+
+ %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef)
+ %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef)
+ %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+ %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+ %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+ %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c5d677e574c13c..c037eb5bfc17f4 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_umin_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_umin_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -48,6 +75,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umax_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_umax_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -81,7 +133,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umax_i16'
@@ -92,7 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -102,7 +168,15 @@ define i32 @reduce_umax_i16(i32 %arg) {
%V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
%V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
- %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -116,6 +190,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umax_i32'
@@ -127,6 +209,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -137,32 +227,19 @@ define i32 @reduce_umax_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_umax_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_umax_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_umax_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -171,6 +248,15 @@ define i32 @reduce_umax_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -184,6 +270,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i1'
@@ -195,6 +289,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
@@ -205,6 +307,15 @@ define i32 @reduce_smin_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -218,6 +329,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smax_i8'
@@ -229,6 +348,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -239,6 +366,15 @@ define i32 @reduce_smax_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -252,6 +388,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smax_i16'
@@ -263,6 +407,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -273,6 +425,15 @@ define i32 @reduce_smax_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -286,6 +447,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smax_i32'
@@ -297,6 +466,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -307,32 +484,19 @@ define i32 @reduce_smax_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_smax_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_smax_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_smax_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
@@ -341,6 +505,15 @@ define i32 @reduce_smax_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 89bff381384156..858c053f0c3ef3 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_umin_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_umin_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -48,6 +75,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_umin_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -82,6 +134,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_umin_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -116,6 +193,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_umin_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_umin_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_umin_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_umin_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_umin_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -184,6 +273,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i1'
@@ -195,6 +292,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
@@ -205,6 +310,15 @@ define i32 @reduce_smin_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -218,6 +332,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i8'
@@ -229,6 +351,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -239,6 +369,15 @@ define i32 @reduce_smin_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -252,6 +391,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i16'
@@ -263,6 +410,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -273,6 +428,15 @@ define i32 @reduce_smin_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -286,6 +450,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i32'
@@ -297,6 +469,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -307,32 +487,19 @@ define i32 @reduce_smin_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_smin_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_smin_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_smin_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
@@ -341,6 +508,15 @@ define i32 @reduce_smin_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index d75a95f3fadd97..2db77240fc3fcf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
@@ -17,6 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -31,6 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -44,6 +68,18 @@ define i32 @reduce_i1(i32 %arg) {
%V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
%V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
%V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+ %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+ %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+ %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
ret i32 undef
}
@@ -57,6 +93,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -68,6 +112,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -78,6 +130,15 @@ define i32 @reduce_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -91,6 +152,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -102,6 +171,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -112,6 +189,15 @@ define i32 @reduce_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -125,6 +211,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -136,6 +230,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -146,32 +248,19 @@ define i32 @reduce_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
@@ -180,6 +269,15 @@ define i32 @reduce_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 4f6e0ba074ed81..7f1ff31f594348 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -1,34 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+ %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
@@ -37,26 +45,32 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+ %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
ret half %red
}
@@ -65,26 +79,32 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+ %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
ret half %red
}
@@ -93,26 +113,32 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+ %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
@@ -120,15 +146,18 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
%red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+ %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
@@ -136,15 +165,18 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
+ %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
@@ -153,26 +185,32 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+ %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
ret float %red
}
@@ -180,15 +218,18 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
%red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+ %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
ret float %red
}
@@ -196,15 +237,18 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
%red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
+ %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
ret float %red
}
@@ -213,26 +257,32 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+ %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
ret float %red
}
@@ -240,15 +290,18 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
%red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+ %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
ret float %red
}
@@ -256,15 +309,18 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
+ %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
ret float %red
}
@@ -273,26 +329,32 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+ %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
@@ -300,15 +362,18 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
%red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+ %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
@@ -316,15 +381,18 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
%red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
+ %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
@@ -333,26 +401,32 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+ %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
ret double %red
}
@@ -360,15 +434,18 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
%red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+ %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
ret double %red
}
@@ -376,15 +453,18 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
+ %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
ret double %red
}
@@ -393,26 +473,32 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+ %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
ret double %red
}
@@ -420,15 +506,18 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
%red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+ %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
ret double %red
}
@@ -436,15 +525,18 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
+ %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
ret double %red
}
@@ -453,39 +545,48 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+ %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
+ %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
@@ -494,13 +595,16 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
ret half %red
}
@@ -509,13 +613,16 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
ret half %red
}
@@ -524,13 +631,16 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
ret half %red
}
@@ -539,39 +649,48 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+ %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
+ %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
@@ -580,13 +699,16 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
ret float %red
}
@@ -595,13 +717,16 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
ret float %red
}
@@ -610,13 +735,16 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
ret float %red
}
@@ -625,39 +753,48 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+ %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
+ %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
@@ -666,13 +803,16 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
ret double %red
}
@@ -681,13 +821,16 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
ret double %red
}
@@ -696,13 +839,16 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
ret double %red
}
@@ -711,39 +857,48 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+ %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
+ %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
ret half %red
}
@@ -752,13 +907,16 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
ret half %red
}
@@ -767,13 +925,16 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
ret half %red
}
@@ -782,13 +943,16 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
+ %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
ret half %red
}
@@ -797,39 +961,48 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+ %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
+ %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
@@ -838,13 +1011,16 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
ret float %red
}
@@ -853,13 +1029,16 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
ret float %red
}
@@ -868,13 +1047,16 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
+ %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
ret float %red
}
@@ -883,39 +1065,48 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+ %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
+ %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
ret double %red
}
@@ -924,13 +1115,16 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
ret double %red
}
@@ -939,13 +1133,16 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
ret double %red
}
@@ -954,25 +1151,31 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
+ %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
ret double %red
}
define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
+ %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
ret float %red
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index 2807f7526760f8..b565cc9ac3af4a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -1,21 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -24,13 +29,16 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -39,13 +47,16 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -54,13 +65,16 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -69,13 +83,16 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -84,13 +101,16 @@ declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -99,13 +119,16 @@ declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -114,13 +137,16 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
ret i8 %red
}
@@ -129,13 +155,16 @@ declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -144,13 +173,16 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -159,13 +191,16 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -174,13 +209,16 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -189,13 +227,16 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -204,13 +245,16 @@ declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -219,13 +263,16 @@ declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -234,13 +281,16 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
ret i8 %red
}
@@ -249,13 +299,16 @@ declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -264,13 +317,16 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -279,13 +335,16 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -294,13 +353,16 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -309,13 +371,16 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -324,13 +389,16 @@ declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -339,13 +407,16 @@ declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -354,13 +425,16 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
+ %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
ret i8 %red
}
@@ -369,13 +443,16 @@ declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -383,15 +460,18 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
%red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -399,15 +479,18 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
%red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -416,13 +499,16 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -431,13 +517,16 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -446,13 +535,16 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -461,13 +553,16 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -476,13 +571,16 @@ declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -491,13 +589,16 @@ declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -506,13 +607,16 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
ret i16 %red
}
@@ -521,13 +625,16 @@ declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -535,15 +642,18 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
%red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -551,15 +661,18 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
%red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -568,13 +681,16 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -583,13 +699,16 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -598,13 +717,16 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -613,13 +735,16 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -628,13 +753,16 @@ declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -643,13 +771,16 @@ declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -658,13 +789,16 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
ret i16 %red
}
@@ -673,13 +807,16 @@ declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -687,15 +824,18 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
%red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -703,15 +843,18 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
%red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
+ %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -720,13 +863,16 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -735,13 +881,16 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -750,13 +899,16 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -765,13 +917,16 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -780,13 +935,16 @@ declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -795,13 +953,16 @@ declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -810,13 +971,16 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
+ %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
ret i16 %red
}
@@ -825,13 +989,16 @@ declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -839,15 +1006,18 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
%red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+ %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -855,15 +1025,18 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
%red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
+ %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -872,13 +1045,16 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -887,13 +1063,16 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -902,13 +1081,16 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -917,13 +1099,16 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -932,13 +1117,16 @@ declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -947,13 +1135,16 @@ declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -962,13 +1153,16 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
ret i32 %red
}
@@ -977,13 +1171,16 @@ declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -991,15 +1188,18 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
%red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+ %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1007,15 +1207,18 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
%red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
+ %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1024,13 +1227,16 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1039,13 +1245,16 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1054,13 +1263,16 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1069,13 +1281,16 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1084,13 +1299,16 @@ declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1099,13 +1317,16 @@ declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1114,13 +1335,16 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1129,13 +1353,16 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1143,15 +1370,18 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
%red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+ %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1159,15 +1389,18 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
%red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
+ %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1176,13 +1409,16 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1191,13 +1427,16 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1206,13 +1445,16 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1221,13 +1463,16 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1236,13 +1481,16 @@ declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1251,13 +1499,16 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1266,13 +1517,16 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
+ %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
ret i32 %red
}
@@ -1281,13 +1535,16 @@ declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1295,15 +1552,18 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
%red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1311,15 +1571,18 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
%red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1328,13 +1591,16 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1343,13 +1609,16 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1358,13 +1627,16 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1373,13 +1645,16 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1388,13 +1663,16 @@ declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1403,13 +1681,16 @@ declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1418,13 +1699,16 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1433,13 +1717,16 @@ declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1447,15 +1734,18 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
%red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1463,15 +1753,18 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
%red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1480,13 +1773,16 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1495,13 +1791,16 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1510,13 +1809,16 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1525,13 +1827,16 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1540,13 +1845,16 @@ declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1555,13 +1863,16 @@ declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1570,13 +1881,16 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1585,13 +1899,16 @@ declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1599,15 +1916,18 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
%red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1615,15 +1935,18 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
%red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
+ %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1632,13 +1955,16 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1647,13 +1973,16 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1662,13 +1991,16 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1677,13 +2009,16 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1692,13 +2027,16 @@ declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1707,13 +2045,16 @@ declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
@@ -1722,12 +2063,15 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
+ %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
ret i64 %red
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index aa03b02895d5f5..228fa602be0bdb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
-; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
-; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
+; RUN: | FileCheck %s --check-prefix=SIZE
define i32 @reduce_i1(i32 %arg) {
; CHECK-LABEL: 'reduce_i1'
@@ -14,6 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -25,6 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -35,6 +53,15 @@ define i32 @reduce_i1(i32 %arg) {
%V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
%V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
%V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+
+ %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -48,6 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -59,6 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -69,6 +112,15 @@ define i32 @reduce_i8(i32 %arg) {
%V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
%V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
%V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
+
+ %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -82,6 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -93,6 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -103,6 +171,15 @@ define i32 @reduce_i16(i32 %arg) {
%V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
%V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
+
+ %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
@@ -116,6 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -127,6 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -137,32 +230,19 @@ define i32 @reduce_i32(i32 %arg) {
%V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
%V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
%V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
+
+ %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
define i32 @reduce_i64(i32 %arg) {
-; CHECK-LABEL: 'reduce_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SIZE-LABEL: 'reduce_i64'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
%V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
@@ -171,6 +251,15 @@ define i32 @reduce_i64(i32 %arg) {
%V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
%V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
%V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+
+ %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+ %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+ %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+ %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+ %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+ %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+ %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
>From bca67aba598603cd82e349fed0bb368370fded09 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 19:14:20 -0700
Subject: [PATCH 2/7] [RISCV][TTI] Implement instruction costs for vp.reduce.*.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 69 +++
.../Analysis/CostModel/RISCV/reduce-add.ll | 166 ++++---
.../Analysis/CostModel/RISCV/reduce-and.ll | 174 ++++---
.../Analysis/CostModel/RISCV/reduce-fadd.ll | 470 +++++++++++-------
.../CostModel/RISCV/reduce-fmaximum.ll | 224 ++++-----
.../CostModel/RISCV/reduce-fminimum.ll | 112 ++---
.../Analysis/CostModel/RISCV/reduce-fmul.ll | 254 +++++-----
.../Analysis/CostModel/RISCV/reduce-max.ll | 332 ++++++++-----
.../Analysis/CostModel/RISCV/reduce-min.ll | 332 ++++++++-----
.../Analysis/CostModel/RISCV/reduce-or.ll | 178 ++++---
.../CostModel/RISCV/reduce-scalable-fp.ll | 268 +++++-----
.../CostModel/RISCV/reduce-scalable-int.ll | 456 ++++++++---------
.../Analysis/CostModel/RISCV/reduce-xor.ll | 166 ++++---
.../CostModel/RISCV/rvv-intrinsics.ll | 64 +--
14 files changed, 1879 insertions(+), 1386 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 6344bc4664d3b6..9df72f201befc8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1191,6 +1191,75 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
CostKind);
+ case Intrinsic::vp_reduce_add:
+ case Intrinsic::vp_reduce_fadd:
+ case Intrinsic::vp_reduce_mul:
+ case Intrinsic::vp_reduce_fmul:
+ case Intrinsic::vp_reduce_and:
+ case Intrinsic::vp_reduce_or:
+ case Intrinsic::vp_reduce_xor: {
+ unsigned Opcode;
+ switch (ICA.getID()) {
+ case Intrinsic::vp_reduce_add:
+ Opcode = Instruction::Add;
+ break;
+ case Intrinsic::vp_reduce_fadd:
+ Opcode = Instruction::FAdd;
+ break;
+ case Intrinsic::vp_reduce_mul:
+ Opcode = Instruction::Mul;
+ break;
+ case Intrinsic::vp_reduce_fmul:
+ Opcode = Instruction::FMul;
+ break;
+ case Intrinsic::vp_reduce_and:
+ Opcode = Instruction::And;
+ break;
+ case Intrinsic::vp_reduce_or:
+ Opcode = Instruction::Or;
+ break;
+ case Intrinsic::vp_reduce_xor:
+ Opcode = Instruction::Xor;
+ break;
+ }
+ return getArithmeticReductionCost(Opcode,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
+ }
+ case Intrinsic::vp_reduce_smax:
+ case Intrinsic::vp_reduce_smin:
+ case Intrinsic::vp_reduce_umax:
+ case Intrinsic::vp_reduce_umin:
+ case Intrinsic::vp_reduce_fmax:
+ case Intrinsic::vp_reduce_fmaximum:
+ case Intrinsic::vp_reduce_fmin:
+ case Intrinsic::vp_reduce_fminimum: {
+ unsigned IID;
+ switch (ICA.getID()) {
+ case Intrinsic::vp_reduce_smax:
+ IID = Intrinsic::smax;
+ break;
+ case Intrinsic::vp_reduce_smin:
+ IID = Intrinsic::smin;
+ break;
+ case Intrinsic::vp_reduce_umax:
+ IID = Intrinsic::umax;
+ break;
+ case Intrinsic::vp_reduce_umin:
+ IID = Intrinsic::umin;
+ break;
+ case Intrinsic::vp_reduce_fmax:
+ case Intrinsic::vp_reduce_fmaximum:
+ IID = Intrinsic::maximum;
+ break;
+ case Intrinsic::vp_reduce_fmin:
+ case Intrinsic::vp_reduce_fminimum:
+ IID = Intrinsic::minimum;
+ break;
+ }
+ return getMinMaxReductionCost(IID, cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
+ }
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 1edcdecb923e57..70687da17eb1a5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -16,14 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_i32(i32 %arg) {
}
define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index f72298b8060631..76f2bd949c652c 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -19,17 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -44,17 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -93,13 +93,13 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -111,13 +111,13 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -149,14 +149,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -168,14 +168,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef)
@@ -208,14 +208,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -227,14 +227,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef)
@@ -258,6 +258,44 @@ define i32 @reduce_i32(i32 %arg) {
}
define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index f1636785b90b81..32b62be3afedb2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -20,13 +20,13 @@ define void @reduce_fadd_bfloat() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -51,13 +51,13 @@ define void @reduce_fadd_bfloat() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -114,20 +114,20 @@ define void @reduce_fadd_half() {
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -146,13 +146,13 @@ define void @reduce_fadd_half() {
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -176,20 +176,20 @@ define void @reduce_fadd_half() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -239,20 +239,20 @@ define void @reduce_fadd_float() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fadd_float'
@@ -269,20 +269,20 @@ define void @reduce_fadd_float() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -330,20 +330,20 @@ define void @reduce_fadd_double() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fadd_double'
@@ -359,20 +359,20 @@ define void @reduce_fadd_double() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -408,6 +408,64 @@ define void @reduce_fadd_double() {
define void @reduce_ordered_fadd_bfloat() {
; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat'
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_ordered_fadd_bfloat'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -444,6 +502,68 @@ define void @reduce_ordered_fadd_bfloat() {
define void @reduce_ordered_fadd_half() {
;
+; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half'
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half'
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
; SIZE-LABEL: 'reduce_ordered_fadd_half'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
@@ -459,20 +579,20 @@ define void @reduce_ordered_fadd_half() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -522,20 +642,20 @@ define void @reduce_ordered_fadd_float() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fadd_float'
@@ -552,20 +672,20 @@ define void @reduce_ordered_fadd_float() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, <vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -613,20 +733,20 @@ define void @reduce_ordered_fadd_double() {
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'reduce_ordered_fadd_double'
@@ -642,20 +762,20 @@ define void @reduce_ordered_fadd_double() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, <vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index dedeb4be67ae82..6e33ccaaed5c7e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -22,34 +22,34 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -67,34 +67,34 @@ define float @reduce_fmaximum_f32(float %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -165,34 +165,34 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -208,34 +208,34 @@ define double @reduce_fmaximum_f64(double %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 6d51911b4fc408..d8fd6393039282 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -15,20 +15,20 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -39,20 +39,20 @@ define float @reduce_fmaximum_f32(float %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -96,20 +96,20 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -119,20 +119,20 @@ define double @reduce_fmaximum_f64(double %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index dc43a54ff1855b..35a5343b6cc65d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -20,13 +20,13 @@ define void @reduce_fmul_bfloat() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -51,13 +51,13 @@ define void @reduce_fmul_bfloat() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -115,13 +115,13 @@ define void @reduce_fmul_half() {
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -146,13 +146,13 @@ define void @reduce_fmul_half() {
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -177,13 +177,13 @@ define void @reduce_fmul_half() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -240,13 +240,13 @@ define void @reduce_fmul_float() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 121 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 451 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 483 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 547 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -270,13 +270,13 @@ define void @reduce_fmul_float() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -331,13 +331,13 @@ define void @reduce_fmul_double() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 361 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 457 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 585 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -360,13 +360,13 @@ define void @reduce_fmul_double() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -421,14 +421,14 @@ define void @reduce_ordered_fmul_bfloat() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -452,14 +452,14 @@ define void @reduce_ordered_fmul_bfloat() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -516,14 +516,14 @@ define void @reduce_ordered_fmul_half() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -547,14 +547,14 @@ define void @reduce_ordered_fmul_half() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -610,14 +610,14 @@ define void @reduce_ordered_fmul_float() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 508 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -639,14 +639,14 @@ define void @reduce_ordered_fmul_float() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -698,14 +698,14 @@ define void @reduce_ordered_fmul_double() {
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 285 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 827 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2679 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
@@ -725,14 +725,14 @@ define void @reduce_ordered_fmul_double() {
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 221 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 443 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 887 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index c037eb5bfc17f4..f77e94cd333aa5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -16,14 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umax_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_umax_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
@@ -133,14 +133,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umax_i16'
@@ -151,14 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef)
@@ -190,14 +190,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umax_i32'
@@ -209,14 +209,14 @@ define i32 @reduce_umax_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef)
@@ -240,6 +240,44 @@ define i32 @reduce_umax_i32(i32 %arg) {
}
define i32 @reduce_umax_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_umax_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_umax_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
@@ -270,14 +308,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i1'
@@ -289,14 +327,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef)
@@ -329,14 +367,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smax_i8'
@@ -348,14 +386,14 @@ define i32 @reduce_smax_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
@@ -388,14 +426,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smax_i16'
@@ -407,14 +445,14 @@ define i32 @reduce_smax_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef)
@@ -447,14 +485,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smax_i32'
@@ -466,14 +504,14 @@ define i32 @reduce_smax_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef)
@@ -497,6 +535,44 @@ define i32 @reduce_smax_i32(i32 %arg) {
}
define i32 @reduce_smax_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_smax_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_smax_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 858c053f0c3ef3..3e6a19e86a904a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -16,14 +16,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_umin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_umin_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_umin_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_umin_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_umin_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_umin_i32(i32 %arg) {
}
define i32 @reduce_umin_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_umin_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_umin_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
@@ -273,14 +311,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i1'
@@ -292,14 +330,14 @@ define i32 @reduce_smin_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef)
@@ -332,14 +370,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i8'
@@ -351,14 +389,14 @@ define i32 @reduce_smin_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
@@ -391,14 +429,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i16'
@@ -410,14 +448,14 @@ define i32 @reduce_smin_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef)
@@ -450,14 +488,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_smin_i32'
@@ -469,14 +507,14 @@ define i32 @reduce_smin_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef)
@@ -500,6 +538,44 @@ define i32 @reduce_smin_i32(i32 %arg) {
}
define i32 @reduce_smin_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_smin_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_smin_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index 2db77240fc3fcf..69805e79641011 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -19,17 +19,17 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14076 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 46584 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 166896 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -44,17 +44,17 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3324 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10744 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 37872 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -93,14 +93,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -112,14 +112,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
@@ -152,14 +152,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -171,14 +171,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef)
@@ -211,14 +211,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -230,14 +230,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef)
@@ -261,6 +261,44 @@ define i32 @reduce_i32(i32 %arg) {
}
define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 7f1ff31f594348..134f75d6b4692d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -11,12 +11,12 @@ declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -27,12 +27,12 @@ define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
define half @vreduce_ord_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v)
@@ -45,12 +45,12 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -61,12 +61,12 @@ define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
define half @vreduce_ord_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v)
@@ -79,12 +79,12 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -95,12 +95,12 @@ define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
define half @vreduce_ord_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v)
@@ -113,12 +113,12 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -129,12 +129,12 @@ define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
define float @vreduce_ord_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
@@ -146,13 +146,13 @@ define float @vreduce_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -165,13 +165,13 @@ define float @vreduce_ord_fwadd_nxv1f32(<vscale x 1 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
@@ -185,12 +185,12 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -201,12 +201,12 @@ define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
define float @vreduce_ord_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v)
@@ -218,13 +218,13 @@ define float @vreduce_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -237,13 +237,13 @@ define float @vreduce_ord_fwadd_nxv2f32(<vscale x 2 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 2 x half> %v to <vscale x 2 x float>
@@ -257,12 +257,12 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -273,12 +273,12 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v)
@@ -290,13 +290,13 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -309,13 +309,13 @@ define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
@@ -329,12 +329,12 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -345,12 +345,12 @@ define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
define double @vreduce_ord_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v)
@@ -362,13 +362,13 @@ define double @vreduce_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -381,13 +381,13 @@ define double @vreduce_ord_fwadd_nxv1f64(<vscale x 1 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
@@ -401,12 +401,12 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -417,12 +417,12 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v)
@@ -434,13 +434,13 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -453,13 +453,13 @@ define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
@@ -473,12 +473,12 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_fadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -489,12 +489,12 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v)
@@ -506,13 +506,13 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fwadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -525,13 +525,13 @@ define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
@@ -545,12 +545,12 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -561,12 +561,12 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -577,12 +577,12 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -595,12 +595,12 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -613,12 +613,12 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -631,12 +631,12 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -649,12 +649,12 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -665,12 +665,12 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -681,12 +681,12 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -699,12 +699,12 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -717,12 +717,12 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -735,12 +735,12 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -753,12 +753,12 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -769,12 +769,12 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -785,12 +785,12 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -803,12 +803,12 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -821,12 +821,12 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -839,12 +839,12 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -857,12 +857,12 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -873,12 +873,12 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -889,12 +889,12 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -907,12 +907,12 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -925,12 +925,12 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -943,12 +943,12 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -961,12 +961,12 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -977,12 +977,12 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -993,12 +993,12 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -1011,12 +1011,12 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -1029,12 +1029,12 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -1047,12 +1047,12 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -1065,12 +1065,12 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1081,12 +1081,12 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1097,12 +1097,12 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1115,12 +1115,12 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -1133,12 +1133,12 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -1151,12 +1151,12 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
@@ -1167,12 +1167,12 @@ define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index b565cc9ac3af4a..6cd817a93552b7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -11,12 +11,12 @@ declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
@@ -29,12 +29,12 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
@@ -47,12 +47,12 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
@@ -65,12 +65,12 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
@@ -83,12 +83,12 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
@@ -101,12 +101,12 @@ declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
@@ -119,12 +119,12 @@ declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
@@ -137,12 +137,12 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, <vscale x 1 x i8> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
@@ -155,12 +155,12 @@ declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
@@ -173,12 +173,12 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
@@ -191,12 +191,12 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
@@ -209,12 +209,12 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
@@ -227,12 +227,12 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
@@ -245,12 +245,12 @@ declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
@@ -263,12 +263,12 @@ declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
@@ -281,12 +281,12 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, <vscale x 2 x i8> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
@@ -299,12 +299,12 @@ declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
@@ -317,12 +317,12 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
@@ -335,12 +335,12 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
@@ -353,12 +353,12 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
@@ -371,12 +371,12 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
@@ -389,12 +389,12 @@ declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
@@ -407,12 +407,12 @@ declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
@@ -425,12 +425,12 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, <vscale x 4 x i8> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red
;
%red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
@@ -443,12 +443,12 @@ declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
@@ -460,13 +460,13 @@ define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -479,13 +479,13 @@ define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv1i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv1i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, <vscale x 1 x i16> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
@@ -499,12 +499,12 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
@@ -517,12 +517,12 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
@@ -535,12 +535,12 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
@@ -553,12 +553,12 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
@@ -571,12 +571,12 @@ declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
@@ -589,12 +589,12 @@ declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
@@ -607,12 +607,12 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, <vscale x 1 x i16> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
@@ -625,12 +625,12 @@ declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
@@ -642,13 +642,13 @@ define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -661,13 +661,13 @@ define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv2i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, <vscale x 2 x i16> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
@@ -681,12 +681,12 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
@@ -699,12 +699,12 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
@@ -717,12 +717,12 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
@@ -735,12 +735,12 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
@@ -753,12 +753,12 @@ declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
@@ -771,12 +771,12 @@ declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
@@ -789,12 +789,12 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, <vscale x 2 x i16> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
@@ -807,12 +807,12 @@ declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
@@ -824,13 +824,13 @@ define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -843,13 +843,13 @@ define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv4i8'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, <vscale x 4 x i16> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
@@ -863,12 +863,12 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
@@ -881,12 +881,12 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
@@ -899,12 +899,12 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
@@ -917,12 +917,12 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
@@ -935,12 +935,12 @@ declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
@@ -953,12 +953,12 @@ declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
@@ -971,12 +971,12 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, <vscale x 4 x i16> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red
;
%red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
@@ -989,12 +989,12 @@ declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
@@ -1006,13 +1006,13 @@ define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -1025,13 +1025,13 @@ define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv1i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv1i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, <vscale x 1 x i32> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
@@ -1045,12 +1045,12 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
@@ -1063,12 +1063,12 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
@@ -1081,12 +1081,12 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
@@ -1099,12 +1099,12 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
@@ -1117,12 +1117,12 @@ declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
@@ -1135,12 +1135,12 @@ declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
@@ -1153,12 +1153,12 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, <vscale x 1 x i32> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
@@ -1171,12 +1171,12 @@ declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
@@ -1188,13 +1188,13 @@ define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1207,13 +1207,13 @@ define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv2i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
@@ -1227,12 +1227,12 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1245,12 +1245,12 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
@@ -1263,12 +1263,12 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1281,12 +1281,12 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
@@ -1299,12 +1299,12 @@ declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
@@ -1317,12 +1317,12 @@ declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
@@ -1335,12 +1335,12 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, <vscale x 2 x i32> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
@@ -1353,12 +1353,12 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
@@ -1370,13 +1370,13 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1389,13 +1389,13 @@ define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv4i16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, <vscale x 4 x i32> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
@@ -1409,12 +1409,12 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1427,12 +1427,12 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
@@ -1445,12 +1445,12 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1463,12 +1463,12 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
@@ -1481,12 +1481,12 @@ declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
@@ -1499,12 +1499,12 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
@@ -1517,12 +1517,12 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, <vscale x 4 x i32> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red
;
%red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
@@ -1535,12 +1535,12 @@ declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_add_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_add_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
@@ -1552,13 +1552,13 @@ define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1571,13 +1571,13 @@ define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv1i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv1i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, <vscale x 1 x i64> %e, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
@@ -1591,12 +1591,12 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1609,12 +1609,12 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
@@ -1627,12 +1627,12 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1645,12 +1645,12 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
@@ -1663,12 +1663,12 @@ declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_and_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_and_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
@@ -1681,12 +1681,12 @@ declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_or_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_or_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
@@ -1699,12 +1699,12 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv1i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv1i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, <vscale x 1 x i64> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
@@ -1717,12 +1717,12 @@ declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_add_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_add_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
@@ -1734,13 +1734,13 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1753,13 +1753,13 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv2i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv2i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> %e, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
@@ -1773,12 +1773,12 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1791,12 +1791,12 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
@@ -1809,12 +1809,12 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1827,12 +1827,12 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
@@ -1845,12 +1845,12 @@ declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_and_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_and_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
@@ -1863,12 +1863,12 @@ declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_or_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_or_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
@@ -1881,12 +1881,12 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv2i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv2i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, <vscale x 2 x i64> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
@@ -1899,12 +1899,12 @@ declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_add_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_add_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
@@ -1916,13 +1916,13 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vwreduce_add_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_add_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1935,13 +1935,13 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: 'vwreduce_uadd_nxv4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vwreduce_uadd_nxv4i32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> %e, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
@@ -1955,12 +1955,12 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_umax_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umax_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1973,12 +1973,12 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_smax_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smax_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
@@ -1991,12 +1991,12 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_umin_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_umin_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
@@ -2009,12 +2009,12 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_smin_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_smin_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
@@ -2027,12 +2027,12 @@ declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_and_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_and_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
@@ -2045,12 +2045,12 @@ declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_or_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_or_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
@@ -2063,12 +2063,12 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: 'vreduce_xor_nxv4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red
;
; SIZE-LABEL: 'vreduce_xor_nxv4i64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; SIZE-NEXT: Cost Model: Invalid cost for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, <vscale x 4 x i64> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red
;
%red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index 228fa602be0bdb..1ea5bcdf8ef9d9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -16,14 +16,14 @@ define i32 @reduce_i1(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 414 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1342 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4734 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i1'
@@ -35,14 +35,14 @@ define i32 @reduce_i1(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1150 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -75,14 +75,14 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2686 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -94,14 +94,14 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 894 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
@@ -134,14 +134,14 @@ define i32 @reduce_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 830 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2685 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i16'
@@ -153,14 +153,14 @@ define i32 @reduce_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 893 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef)
@@ -193,14 +193,14 @@ define i32 @reduce_i32(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 829 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2683 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i32'
@@ -212,14 +212,14 @@ define i32 @reduce_i32(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 445 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 891 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef)
@@ -243,6 +243,44 @@ define i32 @reduce_i32(i32 %arg) {
}
define i32 @reduce_i64(i32 %arg) {
+; CHECK-LABEL: 'reduce_i64'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'reduce_i64'
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
%V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
%V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
%V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index bb98508f239c1b..869e51966e092a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -1218,37 +1218,37 @@ define void @reduce_add() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPEBASED-LABEL: 'reduce_add'
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -1324,37 +1324,37 @@ define void @reduce_fadd() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPEBASED-LABEL: 'reduce_fadd'
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
-; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
>From ba2534fe8db5c2ea25de335aab2d72372fb59e70 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 29 Oct 2024 21:58:43 -0700
Subject: [PATCH 3/7] Fix missing tests
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +
.../Analysis/CostModel/RISCV/reduce-and.ll | 3 +
.../CostModel/RISCV/reduce-fmaximum.ll | 112 +++++++-------
.../CostModel/RISCV/reduce-fminimum.ll | 56 +++----
.../Analysis/CostModel/RISCV/reduce-max.ll | 3 +
.../CostModel/RISCV/reduce-scalable-fp.ll | 144 +++++++++---------
6 files changed, 166 insertions(+), 156 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 9df72f201befc8..dc7165bf1cc364 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1249,10 +1249,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
IID = Intrinsic::umin;
break;
case Intrinsic::vp_reduce_fmax:
+ IID = Intrinsic::maxnum;
+ break;
case Intrinsic::vp_reduce_fmaximum:
IID = Intrinsic::maximum;
break;
case Intrinsic::vp_reduce_fmin:
+ IID = Intrinsic::minnum;
+ break;
case Intrinsic::vp_reduce_fminimum:
IID = Intrinsic::minimum;
break;
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 76f2bd949c652c..47ae0bfa58cf9a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -100,6 +100,7 @@ define i32 @reduce_i8(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SIZE-LABEL: 'reduce_i8'
@@ -118,6 +119,7 @@ define i32 @reduce_i8(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
@@ -136,6 +138,7 @@ define i32 @reduce_i8(i32 %arg) {
%V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
%V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef)
%V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef)
+ %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef)
ret i32 undef
}
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index 6e33ccaaed5c7e..c28fdfbfddac1a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -36,20 +36,20 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -81,20 +81,20 @@ define float @reduce_fmaximum_f32(float %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef)
@@ -179,20 +179,20 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -222,20 +222,20 @@ define double @reduce_fmaximum_f64(double %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index d8fd6393039282..99c9f5d89f6963 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -22,13 +22,13 @@ define float @reduce_fmaximum_f32(float %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef
;
; SIZE-LABEL: 'reduce_fmaximum_f32'
@@ -46,13 +46,13 @@ define float @reduce_fmaximum_f32(float %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef
;
%V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef)
@@ -103,13 +103,13 @@ define double @reduce_fmaximum_f64(double %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef
;
; SIZE-LABEL: 'reduce_fmaximum_f64'
@@ -126,13 +126,13 @@ define double @reduce_fmaximum_f64(double %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef
;
%V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index f77e94cd333aa5..e0f98a759cc89b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -133,6 +133,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
@@ -151,6 +152,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
@@ -168,6 +170,7 @@ define i32 @reduce_umax_i16(i32 %arg) {
%V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
%V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
%V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+ %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef)
%V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef)
%V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 134f75d6b4692d..74dbcfae93f858 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -545,12 +545,12 @@ declare half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half>)
define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -561,12 +561,12 @@ define half @vreduce_fmin_nxv1f16(<vscale x 1 x half> %v) {
define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -577,12 +577,12 @@ define half @vreduce_fmin_nxv1f16_nonans(<vscale x 1 x half> %v) {
define half @vreduce_fmin_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16(<vscale x 1 x half> %v)
@@ -595,12 +595,12 @@ declare half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half>)
define half @vreduce_fmin_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %v)
@@ -613,12 +613,12 @@ declare half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half>)
define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %v)
@@ -631,12 +631,12 @@ declare half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half>)
define half @vreduce_fmin_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv64f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv64f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmin.nxv64f16(<vscale x 64 x half> %v)
@@ -649,12 +649,12 @@ declare float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float>)
define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -665,12 +665,12 @@ define float @vreduce_fmin_nxv1f32(<vscale x 1 x float> %v) {
define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -681,12 +681,12 @@ define float @vreduce_fmin_nxv1f32_nonans(<vscale x 1 x float> %v) {
define float @vreduce_fmin_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> %v)
@@ -699,12 +699,12 @@ declare float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float>)
define float @vreduce_fmin_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %v)
@@ -717,12 +717,12 @@ declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
@@ -735,12 +735,12 @@ declare float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float>)
define float @vreduce_fmin_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv32f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv32f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmin.nxv32f32(<vscale x 32 x float> %v)
@@ -753,12 +753,12 @@ declare double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double>)
define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -769,12 +769,12 @@ define double @vreduce_fmin_nxv1f64(<vscale x 1 x double> %v) {
define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -785,12 +785,12 @@ define double @vreduce_fmin_nxv1f64_nonans(<vscale x 1 x double> %v) {
define double @vreduce_fmin_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64(<vscale x 1 x double> %v)
@@ -803,12 +803,12 @@ declare double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double>)
define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %v)
@@ -821,12 +821,12 @@ declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
@@ -839,12 +839,12 @@ declare double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double>)
define double @vreduce_fmin_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: 'vreduce_fmin_nxv16f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmin_nxv16f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmin.nxv16f64(<vscale x 16 x double> %v)
@@ -857,12 +857,12 @@ declare half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half>)
define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -873,12 +873,12 @@ define half @vreduce_fmax_nxv1f16(<vscale x 1 x half> %v) {
define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -889,12 +889,12 @@ define half @vreduce_fmax_nxv1f16_nonans(<vscale x 1 x half> %v) {
define half @vreduce_fmax_nxv1f16_nonans_noinfs(<vscale x 1 x half> %v) #1 {
; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, <vscale x 1 x half> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16(<vscale x 1 x half> %v)
@@ -907,12 +907,12 @@ declare half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half>)
define half @vreduce_fmax_nxv2f16(<vscale x 2 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, <vscale x 2 x half> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %v)
@@ -925,12 +925,12 @@ declare half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half>)
define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, <vscale x 4 x half> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %v)
@@ -943,12 +943,12 @@ declare half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half>)
define half @vreduce_fmax_nxv64f16(<vscale x 64 x half> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv64f16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv64f16'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, <vscale x 64 x half> %v, <vscale x 64 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red
;
%red = call half @llvm.vector.reduce.fmax.nxv64f16(<vscale x 64 x half> %v)
@@ -961,12 +961,12 @@ declare float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float>)
define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -977,12 +977,12 @@ define float @vreduce_fmax_nxv1f32(<vscale x 1 x float> %v) {
define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -993,12 +993,12 @@ define float @vreduce_fmax_nxv1f32_nonans(<vscale x 1 x float> %v) {
define float @vreduce_fmax_nxv1f32_nonans_noinfs(<vscale x 1 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, <vscale x 1 x float> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> %v)
@@ -1011,12 +1011,12 @@ declare float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float>)
define float @vreduce_fmax_nxv2f32(<vscale x 2 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, <vscale x 2 x float> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %v)
@@ -1029,12 +1029,12 @@ declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
@@ -1047,12 +1047,12 @@ declare float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float>)
define float @vreduce_fmax_nxv32f32(<vscale x 32 x float> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv32f32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv32f32'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, <vscale x 32 x float> %v, <vscale x 32 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red
;
%red = call float @llvm.vector.reduce.fmax.nxv32f32(<vscale x 32 x float> %v)
@@ -1065,12 +1065,12 @@ declare double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double>)
define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1081,12 +1081,12 @@ define double @vreduce_fmax_nxv1f64(<vscale x 1 x double> %v) {
define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1097,12 +1097,12 @@ define double @vreduce_fmax_nxv1f64_nonans(<vscale x 1 x double> %v) {
define double @vreduce_fmax_nxv1f64_nonans_noinfs(<vscale x 1 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, <vscale x 1 x double> %v, <vscale x 1 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64(<vscale x 1 x double> %v)
@@ -1115,12 +1115,12 @@ declare double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double>)
define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv2f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, <vscale x 2 x double> %v, <vscale x 2 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %v)
@@ -1133,12 +1133,12 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv4f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv4f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, <vscale x 4 x double> %v, <vscale x 4 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
@@ -1151,12 +1151,12 @@ declare double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double>)
define double @vreduce_fmax_nxv16f64(<vscale x 16 x double> %v) {
; CHECK-LABEL: 'vreduce_fmax_nxv16f64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red
;
; SIZE-LABEL: 'vreduce_fmax_nxv16f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, <vscale x 16 x double> %v, <vscale x 16 x i1> undef, i32 undef)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red
;
%red = call double @llvm.vector.reduce.fmax.nxv16f64(<vscale x 16 x double> %v)
>From 3cb1b3de68e4089ea7f6b9e696368137a12a9aec Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 30 Oct 2024 09:38:37 -0700
Subject: [PATCH 4/7] Fixup! early return
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 103 ++++++++----------
1 file changed, 45 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index dc7165bf1cc364..bdb8d031fad31f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1192,78 +1192,65 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
CostKind);
case Intrinsic::vp_reduce_add:
+ return getArithmeticReductionCost(Instruction::Add,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ std::nullopt, CostKind);
case Intrinsic::vp_reduce_fadd:
+ return getArithmeticReductionCost(Instruction::FAdd,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_mul:
+ return getArithmeticReductionCost(Instruction::Mul,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ std::nullopt, CostKind);
case Intrinsic::vp_reduce_fmul:
+ return getArithmeticReductionCost(Instruction::FMul,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_and:
+ return getArithmeticReductionCost(Instruction::And,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ std::nullopt, CostKind);
case Intrinsic::vp_reduce_or:
- case Intrinsic::vp_reduce_xor: {
- unsigned Opcode;
- switch (ICA.getID()) {
- case Intrinsic::vp_reduce_add:
- Opcode = Instruction::Add;
- break;
- case Intrinsic::vp_reduce_fadd:
- Opcode = Instruction::FAdd;
- break;
- case Intrinsic::vp_reduce_mul:
- Opcode = Instruction::Mul;
- break;
- case Intrinsic::vp_reduce_fmul:
- Opcode = Instruction::FMul;
- break;
- case Intrinsic::vp_reduce_and:
- Opcode = Instruction::And;
- break;
- case Intrinsic::vp_reduce_or:
- Opcode = Instruction::Or;
- break;
- case Intrinsic::vp_reduce_xor:
- Opcode = Instruction::Xor;
- break;
- }
- return getArithmeticReductionCost(Opcode,
+ return getArithmeticReductionCost(Instruction::Or,
cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
- }
+ std::nullopt, CostKind);
+ case Intrinsic::vp_reduce_xor:
+ return getArithmeticReductionCost(Instruction::Xor,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ std::nullopt, CostKind);
case Intrinsic::vp_reduce_smax:
+ return getMinMaxReductionCost(Intrinsic::smax,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_smin:
+ return getMinMaxReductionCost(Intrinsic::smin,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_umax:
+ return getMinMaxReductionCost(Intrinsic::umax,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_umin:
+ return getMinMaxReductionCost(Intrinsic::umin,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_fmax:
+ return getMinMaxReductionCost(Intrinsic::maxnum,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_fmaximum:
+ return getMinMaxReductionCost(Intrinsic::maximum,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_fmin:
- case Intrinsic::vp_reduce_fminimum: {
- unsigned IID;
- switch (ICA.getID()) {
- case Intrinsic::vp_reduce_smax:
- IID = Intrinsic::smax;
- break;
- case Intrinsic::vp_reduce_smin:
- IID = Intrinsic::smin;
- break;
- case Intrinsic::vp_reduce_umax:
- IID = Intrinsic::umax;
- break;
- case Intrinsic::vp_reduce_umin:
- IID = Intrinsic::umin;
- break;
- case Intrinsic::vp_reduce_fmax:
- IID = Intrinsic::maxnum;
- break;
- case Intrinsic::vp_reduce_fmaximum:
- IID = Intrinsic::maximum;
- break;
- case Intrinsic::vp_reduce_fmin:
- IID = Intrinsic::minnum;
- break;
- case Intrinsic::vp_reduce_fminimum:
- IID = Intrinsic::minimum;
- break;
- }
- return getMinMaxReductionCost(IID, cast<VectorType>(ICA.getArgTypes()[1]),
+ return getMinMaxReductionCost(Intrinsic::minnum,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vp_reduce_fminimum:
+ return getMinMaxReductionCost(Intrinsic::minimum,
+ cast<VectorType>(ICA.getArgTypes()[1]),
ICA.getFlags(), CostKind);
- }
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
>From 93cbca811644a7856ce677d1d1f5fd4c4203a792 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 30 Oct 2024 18:38:08 -0700
Subject: [PATCH 5/7] Fixup! using helper function to get the reduction opcode.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 64 ++++++-------------
1 file changed, 19 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index bdb8d031fad31f..2bbb3c4e7bc1ba 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1192,66 +1192,40 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
CostKind);
case Intrinsic::vp_reduce_add:
- return getArithmeticReductionCost(Instruction::Add,
- cast<VectorType>(ICA.getArgTypes()[1]),
- std::nullopt, CostKind);
case Intrinsic::vp_reduce_fadd:
- return getArithmeticReductionCost(Instruction::FAdd,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_mul:
- return getArithmeticReductionCost(Instruction::Mul,
- cast<VectorType>(ICA.getArgTypes()[1]),
- std::nullopt, CostKind);
case Intrinsic::vp_reduce_fmul:
- return getArithmeticReductionCost(Instruction::FMul,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_and:
- return getArithmeticReductionCost(Instruction::And,
- cast<VectorType>(ICA.getArgTypes()[1]),
- std::nullopt, CostKind);
case Intrinsic::vp_reduce_or:
- return getArithmeticReductionCost(Instruction::Or,
- cast<VectorType>(ICA.getArgTypes()[1]),
- std::nullopt, CostKind);
- case Intrinsic::vp_reduce_xor:
- return getArithmeticReductionCost(Instruction::Xor,
- cast<VectorType>(ICA.getArgTypes()[1]),
- std::nullopt, CostKind);
+ case Intrinsic::vp_reduce_xor: {
+ std::optional<Intrinsic::ID> RedID =
+ VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
+ assert(RedID.has_value());
+ unsigned RedOp = getArithmeticReductionInstruction(*RedID);
+ if (RedOp == Instruction::FAdd || RedOp == Instruction::FMul)
+ return getArithmeticReductionCost(RedOp,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
+ return getArithmeticReductionCost(
+ RedOp, cast<VectorType>(ICA.getArgTypes()[1]), std::nullopt, CostKind);
+ }
case Intrinsic::vp_reduce_smax:
- return getMinMaxReductionCost(Intrinsic::smax,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_smin:
- return getMinMaxReductionCost(Intrinsic::smin,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_umax:
- return getMinMaxReductionCost(Intrinsic::umax,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_umin:
- return getMinMaxReductionCost(Intrinsic::umin,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_fmax:
- return getMinMaxReductionCost(Intrinsic::maxnum,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_fmaximum:
- return getMinMaxReductionCost(Intrinsic::maximum,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
case Intrinsic::vp_reduce_fmin:
- return getMinMaxReductionCost(Intrinsic::minnum,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
- case Intrinsic::vp_reduce_fminimum:
- return getMinMaxReductionCost(Intrinsic::minimum,
+ case Intrinsic::vp_reduce_fminimum: {
+ std::optional<Intrinsic::ID> RedID =
+ VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
+ assert(RedID.has_value());
+ Intrinsic::ID MinMaxID = getMinMaxReductionIntrinsicOp(*RedID);
+ return getMinMaxReductionCost(MinMaxID,
cast<VectorType>(ICA.getArgTypes()[1]),
ICA.getFlags(), CostKind);
}
+ }
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
if (auto LT = getTypeLegalizationCost(RetTy);
>From 5c41bc3cfd63073f8ae4d9efcb36f66af616c09a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 5 Nov 2024 18:10:24 -0800
Subject: [PATCH 6/7] Fixup! Revert changes of original run lines and add new
runs for type-based query.
The new RUNs check the instruction costs from type-based queries are same
as normal queries.
---
llvm/test/Analysis/CostModel/RISCV/reduce-add.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-and.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll | 4 ++++
llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll | 4 ++++
llvm/test/Analysis/CostModel/RISCV/reduce-max.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-min.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-or.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll | 5 +++++
llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll | 5 +++++
12 files changed, 58 insertions(+)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
index 70687da17eb1a5..3a1d361f428a9a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
index 47ae0bfa58cf9a..5b0384523bccfc 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 32b62be3afedb2..d1d5934b8a8f16 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -1,4 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefix=SIZE
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
index c28fdfbfddac1a..ec6356896b8c2d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
index 99c9f5d89f6963..9a6918ff51b3bb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index 35a5343b6cc65d..4f702691f92ebf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -1,4 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s --check-prefix=SIZE
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
index e0f98a759cc89b..42462d2479fe77 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
index 3e6a19e86a904a..8b0cfa6ddda8c6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
index 69805e79641011..683aaaa7a20ed9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 74dbcfae93f858..659639bdc1e2e9 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
index 6cd817a93552b7..9a2d1baa583e12 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
index 1ea5bcdf8ef9d9..8a85c8b7817374 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll
@@ -1,4 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
+; Check if type-based queries have same instruction cost.
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output --type-based-intrinsic-cost=true | FileCheck %s
; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output --type-based-intrinsic-cost=true \
>From 9f5ffa6a33a663fb7af3fb4f72f607b6a64e318a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 6 Nov 2024 21:34:07 -0800
Subject: [PATCH 7/7] Fixup! Passing FMF flags.
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2bbb3c4e7bc1ba..5d21bb611df4e4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1202,12 +1202,9 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
assert(RedID.has_value());
unsigned RedOp = getArithmeticReductionInstruction(*RedID);
- if (RedOp == Instruction::FAdd || RedOp == Instruction::FMul)
- return getArithmeticReductionCost(RedOp,
- cast<VectorType>(ICA.getArgTypes()[1]),
- ICA.getFlags(), CostKind);
- return getArithmeticReductionCost(
- RedOp, cast<VectorType>(ICA.getArgTypes()[1]), std::nullopt, CostKind);
+ return getArithmeticReductionCost(RedOp,
+ cast<VectorType>(ICA.getArgTypes()[1]),
+ ICA.getFlags(), CostKind);
}
case Intrinsic::vp_reduce_smax:
case Intrinsic::vp_reduce_smin:
More information about the llvm-commits
mailing list