[llvm] f562e2a - [ARM] Update a number of reduction tests to use -cost-kind=all. NFC

Tue Sep 16 13:50:43 PDT 2025

Author: David Green
Date: 2025-09-16T21:50:38+01:00
New Revision: f562e2a6f5b362f0deabd0234dfdeb702c9b2d5e

URL: https://github.com/llvm/llvm-project/commit/f562e2a6f5b362f0deabd0234dfdeb702c9b2d5e
DIFF: https://github.com/llvm/llvm-project/commit/f562e2a6f5b362f0deabd0234dfdeb702c9b2d5e.diff

LOG: [ARM] Update a number of reduction tests to use -cost-kind=all. NFC

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/ARM/reduce-add.ll
    llvm/test/Analysis/CostModel/ARM/reduce-and.ll
    llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
    llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
    llvm/test/Analysis/CostModel/ARM/reduce-fp.ll
    llvm/test/Analysis/CostModel/ARM/reduce-or.ll
    llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
    llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
    llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
    llvm/test/Analysis/CostModel/ARM/reduce-umin.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
index 4db9d1bb0efaf..6e4d4e3ec515f 100644

--- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll
@@ -1,43 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i64(i32 %arg) {
-; V8M-RECIP-LABEL: 'reduce_i64'
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-LABEL: 'reduce_i64'
+; V8M-NEXT:  Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 8 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 20 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 44 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 92 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
-; NEON-RECIP-LABEL: 'reduce_i64'
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-LABEL: 'reduce_i64'
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 29 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:55 CodeSize:54 Lat:54 SizeLat:54 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:107 CodeSize:103 Lat:103 SizeLat:103 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-; V8M-SIZE-LABEL: 'reduce_i64'
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-SIZE-LABEL: 'reduce_i64'
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'reduce_i64'
+; MVE-NEXT:  Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1  = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
@@ -48,45 +39,35 @@ define i32 @reduce_i64(i32 %arg) {
 }
 
 define i32 @reduce_i32(i32 %arg) {
-; V8M-RECIP-LABEL: 'reduce_i32'
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; V8M-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; NEON-RECIP-LABEL: 'reduce_i32'
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; V8M-LABEL: 'reduce_i32'
+; V8M-NEXT:  Cost Model: Found costs of 4 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 10 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 22 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 46 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 94 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 190 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 382 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
-; V8M-SIZE-LABEL: 'reduce_i32'
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; NEON-LABEL: 'reduce_i32'
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 391 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 488 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:682 CodeSize:681 Lat:681 SizeLat:681 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:1070 CodeSize:1066 Lat:1066 SizeLat:1066 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-; NEON-SIZE-LABEL: 'reduce_i32'
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 681 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1066 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; MVE-LABEL: 'reduce_i32'
+; MVE-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
@@ -97,30 +78,3 @@ define i32 @reduce_i32(i32 %arg) {
   %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
   ret i32 undef
 }
-
-declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
-declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
-
-declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
-declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
-
-declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
-declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
-
-declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
-declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-and.ll b/llvm/test/Analysis/CostModel/ARM/reduce-and.ll
index a220d0bacfa61..8a4407b361909 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-and.ll
@@ -1,19 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 97 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 193 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 385 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; V8M-LABEL: 'reduce_i1'
+; V8M-NEXT:  Cost Model: Found costs of 1 for: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:2 SizeLat:2 for: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:2 Lat:2 SizeLat:2 for: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:2 Lat:2 SizeLat:2 for: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:65 CodeSize:2 Lat:2 SizeLat:2 for: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:129 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'reduce_i1'
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:97 CodeSize:2 Lat:2 SizeLat:2 for: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:193 CodeSize:2 Lat:2 SizeLat:2 for: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:385 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'reduce_i1'
+; MVE-NEXT:  Cost Model: Found costs of 4 for: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:257 CodeSize:2 Lat:2 SizeLat:2 for: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:513 CodeSize:2 Lat:2 SizeLat:2 for: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:1025 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
   %V2   = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
@@ -25,12 +49,3 @@ define i32 @reduce_i1(i32 %arg) {
   %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
   ret i32 undef
 }
-
-declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>)
-declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
-declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
-declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
-declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>)
-declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>)
-declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>)
-declare i1 @llvm.vector.reduce.and.v128i1(<128 x i1>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
index b38660df59a3e..3c13718874e78 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
@@ -1,43 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
-; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
+; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
 
 define void @and() {
 ; CHECK-V8-LABEL: 'and'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 4 for: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 5 for: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 8 for: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 15 for: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 16 for: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 17 for: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'and'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 1 for: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 7 for: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:8 Lat:9 SizeLat:9 for: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 15 for: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:16 Lat:17 SizeLat:17 for: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 7 for: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:11 SizeLat:11 for: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 31 for: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:32 Lat:33 SizeLat:33 for: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 entry:
   %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
@@ -60,40 +60,40 @@ entry:
 
 define void @or() {
 ; CHECK-V8-LABEL: 'or'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 4 for: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 5 for: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 8 for: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 15 for: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 16 for: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 17 for: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'or'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 1 for: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 7 for: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:8 Lat:9 SizeLat:9 for: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 15 for: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:16 Lat:17 SizeLat:17 for: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 7 for: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:11 SizeLat:11 for: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 31 for: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:32 Lat:33 SizeLat:33 for: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 entry:
   %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
@@ -116,40 +116,40 @@ entry:
 
 define void @xor() {
 ; CHECK-V8-LABEL: 'xor'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 4 for: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 5 for: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 8 for: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 15 for: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 16 for: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 17 for: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'xor'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 1 for: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 7 for: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:8 Lat:9 SizeLat:9 for: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 15 for: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:16 Lat:17 SizeLat:17 for: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 3 for: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 7 for: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:9 Lat:11 SizeLat:11 for: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 31 for: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:32 Lat:33 SizeLat:33 for: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 entry:
   %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
@@ -169,49 +169,3 @@ entry:
   %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
   ret void
 }
-
-declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
-declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
-declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
-declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
index 48edae8c7d137..2bb890eee01ac 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
@@ -1,49 +1,49 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
-; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
-; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
+; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @fmin_strict() {
 ; CHECK-V8-LABEL: 'fmin_strict'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 19 for: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 43 for: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 91 for: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:67 Lat:94 SizeLat:94 for: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fmin_strict'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 5 for: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:9 for: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:5 SizeLat:5 for: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fmin_strict'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:409 CodeSize:145 Lat:409 SizeLat:409 for: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:545 CodeSize:193 Lat:545 SizeLat:545 for: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:205 CodeSize:73 Lat:205 SizeLat:205 for: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
   %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
@@ -61,43 +61,43 @@ define void @fmin_strict() {
 
 define void @fmin_unordered() {
 ; CHECK-V8-LABEL: 'fmin_unordered'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 19 for: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 43 for: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 91 for: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:67 Lat:94 SizeLat:94 for: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fmin_unordered'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 5 for: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:9 for: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:5 SizeLat:5 for: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fmin_unordered'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:409 CodeSize:145 Lat:409 SizeLat:409 for: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:545 CodeSize:193 Lat:545 SizeLat:545 for: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:205 CodeSize:73 Lat:205 SizeLat:205 for: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
   %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
@@ -114,43 +114,43 @@ define void @fmin_unordered() {
 
 define void @fmax_strict() {
 ; CHECK-V8-LABEL: 'fmax_strict'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 19 for: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 43 for: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 91 for: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:67 Lat:94 SizeLat:94 for: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fmax_strict'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 5 for: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:9 for: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:5 SizeLat:5 for: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fmax_strict'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:409 CodeSize:145 Lat:409 SizeLat:409 for: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:545 CodeSize:193 Lat:545 SizeLat:545 for: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:205 CodeSize:73 Lat:205 SizeLat:205 for: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
   %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
@@ -168,43 +168,43 @@ define void @fmax_strict() {
 
 define void @fmax_unordered() {
 ; CHECK-V8-LABEL: 'fmax_unordered'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of 7 for: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 19 for: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 43 for: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 91 for: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 2 for: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 3 for: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 1 for: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of 9 for: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:94 CodeSize:67 Lat:94 SizeLat:94 for: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fmax_unordered'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 5 for: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:7 SizeLat:7 for: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:6 Lat:9 SizeLat:9 for: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 1 for: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:4 Lat:5 SizeLat:5 for: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fmax_unordered'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:409 CodeSize:145 Lat:409 SizeLat:409 for: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:545 CodeSize:193 Lat:545 SizeLat:545 for: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:137 CodeSize:49 Lat:137 SizeLat:137 for: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:205 CodeSize:73 Lat:205 SizeLat:205 for: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:13 Lat:35 SizeLat:35 for: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:69 CodeSize:25 Lat:69 SizeLat:69 for: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:118 CodeSize:67 Lat:118 SizeLat:118 for: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
   %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
@@ -218,27 +218,3 @@ define void @fmax_unordered() {
   %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
   ret void
 }
-
-
-declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
-declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
-declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
-declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
-declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
-declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
-declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
-declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
-declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>)
-
-
-declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
-declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
-declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
-declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
-declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
-declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
-declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
-declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
-declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
-declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll
index 87de486eeb183..392ef5e694883 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll
@@ -1,49 +1,49 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
-; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
-; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
+; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @fadd_strict() {
 ; CHECK-V8-LABEL: 'fadd_strict'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:10 SizeLat:6 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:12 Lat:20 SizeLat:12 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:24 Lat:40 SizeLat:24 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:48 Lat:80 SizeLat:48 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:24 SizeLat:8 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:20 Lat:28 SizeLat:20 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fadd_strict'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 12 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 24 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 32 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fadd_strict'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
   %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
@@ -61,43 +61,43 @@ define void @fadd_strict() {
 
 define void @fadd_unordered() {
 ; CHECK-V8-LABEL: 'fadd_unordered'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:9 SizeLat:7 for: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:16 for: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:33 Lat:39 SizeLat:33 for: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:92 CodeSize:66 Lat:74 SizeLat:66 for: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:9 SizeLat:3 for: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:3 Lat:9 SizeLat:3 for: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:30 Lat:34 SizeLat:30 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fadd_unordered'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:8 for: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fadd_unordered'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 169 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 225 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:169 CodeSize:121 Lat:169 SizeLat:169 for: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:225 CodeSize:161 Lat:225 SizeLat:225 for: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:85 CodeSize:61 Lat:85 SizeLat:85 for: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
   %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
@@ -114,43 +114,43 @@ define void @fadd_unordered() {
 
 define void @fmul_strict() {
 ; CHECK-V8-LABEL: 'fmul_strict'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:10 SizeLat:6 for: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:12 Lat:20 SizeLat:12 for: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:24 Lat:40 SizeLat:24 for: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:48 Lat:80 SizeLat:48 for: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:24 SizeLat:8 for: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:20 Lat:28 SizeLat:20 for: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fmul_strict'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 12 for: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 24 for: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 32 for: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fmul_strict'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 16 for: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 4 for: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 8 for: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of 32 for: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
   %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
@@ -168,43 +168,43 @@ define void @fmul_strict() {
 
 define void @fmul_unordered() {
 ; CHECK-V8-LABEL: 'fmul_unordered'
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:9 SizeLat:7 for: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:20 SizeLat:16 for: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:33 Lat:39 SizeLat:33 for: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:92 CodeSize:66 Lat:74 SizeLat:66 for: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:9 SizeLat:3 for: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:12 SizeLat:4 for: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:6 SizeLat:2 for: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:3 Lat:9 SizeLat:3 for: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:52 CodeSize:30 Lat:34 SizeLat:30 for: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEFP-LABEL: 'fmul_unordered'
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 3 for: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 6 for: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:8 for: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 2 for: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of 4 for: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:6 SizeLat:6 for: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-MVEI-LABEL: 'fmul_unordered'
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 169 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 225 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
-; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:169 CodeSize:121 Lat:169 SizeLat:169 for: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:225 CodeSize:161 Lat:225 SizeLat:225 for: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:57 CodeSize:41 Lat:57 SizeLat:57 for: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:85 CodeSize:61 Lat:85 SizeLat:85 for: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:40 Lat:76 SizeLat:76 for: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
   %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
@@ -218,27 +218,3 @@ define void @fmul_unordered() {
   %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
   ret void
 }
-
-
-declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
-declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
-declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
-declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
-declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
-declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
-declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
-declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
-declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
-declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
-
-
-declare half @llvm.vector.reduce.fmul.v2f16(half, <2 x half>)
-declare half @llvm.vector.reduce.fmul.v4f16(half, <4 x half>)
-declare half @llvm.vector.reduce.fmul.v8f16(half, <8 x half>)
-declare half @llvm.vector.reduce.fmul.v16f16(half, <16 x half>)
-declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>)
-declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>)
-declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>)
-declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>)
-declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
-declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-or.ll b/llvm/test/Analysis/CostModel/ARM/reduce-or.ll
index a6b60103c0253..05df62d2131b0 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-or.ll
@@ -1,19 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 97 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 193 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 385 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; V8M-LABEL: 'reduce_i1'
+; V8M-NEXT:  Cost Model: Found costs of 1 for: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:2 Lat:2 SizeLat:2 for: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:2 Lat:2 SizeLat:2 for: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:2 Lat:2 SizeLat:2 for: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:65 CodeSize:2 Lat:2 SizeLat:2 for: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:129 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
+;
+; NEON-LABEL: 'reduce_i1'
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of 2 for: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:97 CodeSize:2 Lat:2 SizeLat:2 for: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:193 CodeSize:2 Lat:2 SizeLat:2 for: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:385 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+;
+; MVE-LABEL: 'reduce_i1'
+; MVE-NEXT:  Cost Model: Found costs of 4 for: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V8 = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:2 Lat:2 SizeLat:2 for: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:257 CodeSize:2 Lat:2 SizeLat:2 for: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:513 CodeSize:2 Lat:2 SizeLat:2 for: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:1025 CodeSize:2 Lat:2 SizeLat:2 for: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
   %V2   = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
@@ -25,12 +49,3 @@ define i32 @reduce_i1(i32 %arg) {
   %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
   ret i32 undef
 }
-
-declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
-declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
-declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
-declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>)
-declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>)
-declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>)
-declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>)
-declare i1 @llvm.vector.reduce.or.v128i1(<128 x i1>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
index 7dcab51e0a1cf..861d825b3ecdb 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i64(i32 %arg) {
 ; V8M-LABEL: 'reduce_i64'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:14 Lat:13 SizeLat:13 for: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:38 Lat:35 SizeLat:35 for: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:79 CodeSize:86 Lat:79 SizeLat:79 for: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:167 CodeSize:182 Lat:167 SizeLat:167 for: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i64'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 17 for: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 31 for: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 76 for: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 178 for: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:59 Lat:110 SizeLat:110 for: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:212 CodeSize:110 Lat:212 SizeLat:212 for: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:416 CodeSize:212 Lat:416 SizeLat:416 for: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:824 CodeSize:416 Lat:824 SizeLat:824 for: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1  = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
@@ -40,28 +40,28 @@ define i32 @reduce_i64(i32 %arg) {
 
 define i32 @reduce_i32(i32 %arg) {
 ; V8M-LABEL: 'reduce_i32'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i32'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
@@ -73,31 +73,31 @@ define i32 @reduce_i32(i32 %arg) {
 
 define i32 @reduce_i16(i32 %arg) {
 ; V8M-LABEL: 'reduce_i16'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i16'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
@@ -110,34 +110,34 @@ define i32 @reduce_i16(i32 %arg) {
 
 define i32 @reduce_i8(i32 %arg) {
 ; V8M-LABEL: 'reduce_i8'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:763 CodeSize:890 Lat:763 SizeLat:763 for: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i8'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
@@ -148,30 +148,3 @@ define i32 @reduce_i8(i32 %arg) {
   %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef)
   ret i32 undef
 }
-
-declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>)
-declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>)
-
-declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
-declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>)
-
-declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>)
-declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>)
-
-declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
-declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
index 617c6b4605189..d8fcc7bca9155 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i64(i32 %arg) {
 ; V8M-LABEL: 'reduce_i64'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:14 Lat:13 SizeLat:13 for: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:38 Lat:35 SizeLat:35 for: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:79 CodeSize:86 Lat:79 SizeLat:79 for: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:167 CodeSize:182 Lat:167 SizeLat:167 for: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i64'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 17 for: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 31 for: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 76 for: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 178 for: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:59 Lat:110 SizeLat:110 for: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:212 CodeSize:110 Lat:212 SizeLat:212 for: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:416 CodeSize:212 Lat:416 SizeLat:416 for: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:824 CodeSize:416 Lat:824 SizeLat:824 for: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1  = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
@@ -40,28 +40,28 @@ define i32 @reduce_i64(i32 %arg) {
 
 define i32 @reduce_i32(i32 %arg) {
 ; V8M-LABEL: 'reduce_i32'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i32'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
@@ -73,31 +73,31 @@ define i32 @reduce_i32(i32 %arg) {
 
 define i32 @reduce_i16(i32 %arg) {
 ; V8M-LABEL: 'reduce_i16'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i16'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
@@ -110,34 +110,34 @@ define i32 @reduce_i16(i32 %arg) {
 
 define i32 @reduce_i8(i32 %arg) {
 ; V8M-LABEL: 'reduce_i8'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:763 CodeSize:890 Lat:763 SizeLat:763 for: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i8'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
@@ -148,30 +148,3 @@ define i32 @reduce_i8(i32 %arg) {
   %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef)
   ret i32 undef
 }
-
-declare i64 @llvm.vector.reduce.smin.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.smin.v8i64(<8 x i64>)
-declare i64 @llvm.vector.reduce.smin.v16i64(<16 x i64>)
-
-declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
-declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>)
-
-declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>)
-declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>)
-
-declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
-declare i8 @llvm.vector.reduce.smin.v128i8(<128 x i8>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
index 764034d18bee0..b95808a1bb4b5 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i64(i32 %arg) {
 ; V8M-LABEL: 'reduce_i64'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:14 Lat:13 SizeLat:13 for: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:38 Lat:35 SizeLat:35 for: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:79 CodeSize:86 Lat:79 SizeLat:79 for: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:167 CodeSize:182 Lat:167 SizeLat:167 for: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i64'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 17 for: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 31 for: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 76 for: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 178 for: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:59 Lat:110 SizeLat:110 for: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:212 CodeSize:110 Lat:212 SizeLat:212 for: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:416 CodeSize:212 Lat:416 SizeLat:416 for: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:824 CodeSize:416 Lat:824 SizeLat:824 for: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1  = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
@@ -40,28 +40,28 @@ define i32 @reduce_i64(i32 %arg) {
 
 define i32 @reduce_i32(i32 %arg) {
 ; V8M-LABEL: 'reduce_i32'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i32'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
@@ -73,31 +73,31 @@ define i32 @reduce_i32(i32 %arg) {
 
 define i32 @reduce_i16(i32 %arg) {
 ; V8M-LABEL: 'reduce_i16'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i16'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
@@ -110,34 +110,34 @@ define i32 @reduce_i16(i32 %arg) {
 
 define i32 @reduce_i8(i32 %arg) {
 ; V8M-LABEL: 'reduce_i8'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:763 CodeSize:890 Lat:763 SizeLat:763 for: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i8'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
@@ -148,30 +148,3 @@ define i32 @reduce_i8(i32 %arg) {
   %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef)
   ret i32 undef
 }
-
-declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.umax.v8i64(<8 x i64>)
-declare i64 @llvm.vector.reduce.umax.v16i64(<16 x i64>)
-
-declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
-declare i32 @llvm.vector.reduce.umax.v32i32(<32 x i32>)
-
-declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.umax.v32i16(<32 x i16>)
-declare i16 @llvm.vector.reduce.umax.v64i16(<64 x i16>)
-
-declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
-declare i8 @llvm.vector.reduce.umax.v128i8(<128 x i8>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
index b5431f63bdca9..055606b0f8617 100644
--- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define i32 @reduce_i64(i32 %arg) {
 ; V8M-LABEL: 'reduce_i64'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:14 Lat:13 SizeLat:13 for: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:35 CodeSize:38 Lat:35 SizeLat:35 for: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:79 CodeSize:86 Lat:79 SizeLat:79 for: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:167 CodeSize:182 Lat:167 SizeLat:167 for: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i64'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 17 for: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 31 for: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 76 for: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of 178 for: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i64'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:110 CodeSize:59 Lat:110 SizeLat:110 for: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:212 CodeSize:110 Lat:212 SizeLat:212 for: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:416 CodeSize:212 Lat:416 SizeLat:416 for: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:824 CodeSize:416 Lat:824 SizeLat:824 for: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V1  = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef)
   %V2  = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
@@ -40,28 +40,28 @@ define i32 @reduce_i64(i32 %arg) {
 
 define i32 @reduce_i32(i32 %arg) {
 ; V8M-LABEL: 'reduce_i32'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i32'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i32'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
   %V4  = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
@@ -73,31 +73,31 @@ define i32 @reduce_i32(i32 %arg) {
 
 define i32 @reduce_i16(i32 %arg) {
 ; V8M-LABEL: 'reduce_i16'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i16'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i16'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2  = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
   %V4  = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
@@ -110,34 +110,34 @@ define i32 @reduce_i16(i32 %arg) {
 
 define i32 @reduce_i8(i32 %arg) {
 ; V8M-LABEL: 'reduce_i8'
-; V8M-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; V8M-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; V8M-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of RThru:763 CodeSize:890 Lat:763 SizeLat:763 for: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; V8M-NEXT:  Cost Model: Found costs of 1 for: ret i32 undef
 ;
 ; NEON-LABEL: 'reduce_i8'
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; NEON-NEXT:  Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; MVE-LABEL: 'reduce_i8'
-; MVE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
-; MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; MVE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
+; MVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %V2   = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef)
   %V4   = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
@@ -148,30 +148,3 @@ define i32 @reduce_i8(i32 %arg) {
   %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef)
   ret i32 undef
 }
-
-declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>)
-declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
-declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
-declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>)
-declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>)
-
-declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
-declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
-declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
-declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
-declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>)
-
-declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
-declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
-declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
-declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
-declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>)
-declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>)
-
-declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
-declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
-declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
-declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
-declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
-declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
-declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>)