[llvm] 38986c6 - [AArch64] Add worst case shuffle costs

Fri Jul 23 01:02:12 PDT 2021

Author: David Green
Date: 2021-07-23T09:01:58+01:00
New Revision: 38986c678285063f0dc1bd5752593bfdbe5068a2

URL: https://github.com/llvm/llvm-project/commit/38986c678285063f0dc1bd5752593bfdbe5068a2
DIFF: https://github.com/llvm/llvm-project/commit/38986c678285063f0dc1bd5752593bfdbe5068a2.diff

LOG: [AArch64] Add worst case shuffle costs

This adds some missing single source shuffle costs for AArch64, of i16
and i8 vectors. v4i16 are the same as v4i32 with a worse case cost of 3
coming from the perfect shuffle tables. The larger vector sizes expand
into a constant pool, plus a load (and adrp) and a tbl. I arbitrarily
chose 8 for the cost to be expensive but not too expensive.

Differential Revision: https://reviews.llvm.org/D106241

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
    llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
    llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1d2d49b287d43..bb15e962e73d0 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2124,13 +2124,20 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
       { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
       { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
       // PermuteSingleSrc shuffle kinds.
-      // TODO: handle vXi8/vXi16.
       { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
       { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
       { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
       { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
       { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
       { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
+      { TTI::SK_PermuteSingleSrc, MVT::v4i16, 3 }, // perfectshuffle worst case.
+      { TTI::SK_PermuteSingleSrc, MVT::v4f16, 3 }, // perfectshuffle worst case.
+      { TTI::SK_PermuteSingleSrc, MVT::v4bf16, 3 }, // perfectshuffle worst case.
+      { TTI::SK_PermuteSingleSrc, MVT::v8i16, 8 }, // constpool + load + tbl
+      { TTI::SK_PermuteSingleSrc, MVT::v8f16, 8 }, // constpool + load + tbl
+      { TTI::SK_PermuteSingleSrc, MVT::v8bf16, 8 }, // constpool + load + tbl
+      { TTI::SK_PermuteSingleSrc, MVT::v8i8, 8 }, // constpool + load + tbl
+      { TTI::SK_PermuteSingleSrc, MVT::v16i8, 8 }, // constpool + load + tbl
       // Reverse can be lowered with `rev`.
       { TTI::SK_Reverse, MVT::v2i32, 1 }, // mov.
       { TTI::SK_Reverse, MVT::v4i32, 2 }, // REV64; EXT

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
index 07f5b7fa0d54e..defbe964b2afc 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
@@ -12,7 +12,7 @@ define void @reduce() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 181 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 362 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index a8c75a9c92221..0f1bddd7325cd 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -4,16 +4,16 @@
 define void @reduce_umin() {
 ; CHECK-LABEL: 'reduce_umin'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
@@ -43,16 +43,16 @@ define void @reduce_umin() {
 define void @reduce_umax() {
 ; CHECK-LABEL: 'reduce_umax'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
@@ -82,16 +82,16 @@ define void @reduce_umax() {
 define void @reduce_smin() {
 ; CHECK-LABEL: 'reduce_smin'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
@@ -121,16 +121,16 @@ define void @reduce_smin() {
 define void @reduce_smax() {
 ; CHECK-LABEL: 'reduce_smax'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 432 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 736 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 186 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
@@ -159,10 +159,10 @@ define void @reduce_smax() {
 
 define void @reduce_fmin() {
 ; CHECK-LABEL: 'reduce_fmin'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
@@ -184,10 +184,10 @@ define void @reduce_fmin() {
 
 define void @reduce_fmax() {
 ; CHECK-LABEL: 'reduce_fmax'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
index 1492fa4364bc6..e30b80414c691 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
@@ -12,7 +12,7 @@ define void @reduce() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 181 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 362 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
index ad8fdb5e0a736..bc5033388dffa 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
@@ -5,14 +5,14 @@ define void @reduce() {
 ; CHECK-LABEL: 'reduce'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 364 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 455 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 637 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1001 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 309 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 673 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V3i8 = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
index 646406ed5114e..854ee4fdfcc2d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
@@ -4,12 +4,12 @@
 define void @shuffle() {
 ; CHECK-LABEL: 'shuffle'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>

diff  --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
index 4aef33c632dd4..ce743526ee565 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
@@ -48,7 +48,7 @@ define i32 @add.i32.v4i32(<4 x i32> %v) {
 
 define i8 @umin.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'umin.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
@@ -57,7 +57,7 @@ define i8 @umin.i8.v8i8(<8 x i8> %v) {
 
 define i8 @umin.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'umin.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
@@ -66,7 +66,7 @@ define i8 @umin.i8.v16i8(<16 x i8> %v) {
 
 define i16 @umin.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'umin.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
@@ -75,7 +75,7 @@ define i16 @umin.i16.v4i16(<4 x i16> %v) {
 
 define i16 @umin.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'umin.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
@@ -93,7 +93,7 @@ define i32 @umin.i32.v4i32(<4 x i32> %v) {
 
 define i8 @umax.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'umax.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
@@ -102,7 +102,7 @@ define i8 @umax.i8.v8i8(<8 x i8> %v) {
 
 define i8 @umax.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'umax.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
@@ -111,7 +111,7 @@ define i8 @umax.i8.v16i8(<16 x i8> %v) {
 
 define i16 @umax.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'umax.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
@@ -120,7 +120,7 @@ define i16 @umax.i16.v4i16(<4 x i16> %v) {
 
 define i16 @umax.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'umax.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
@@ -138,7 +138,7 @@ define i32 @umax.i32.v4i32(<4 x i32> %v) {
 
 define i8 @smin.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'smin.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
@@ -147,7 +147,7 @@ define i8 @smin.i8.v8i8(<8 x i8> %v) {
 
 define i8 @smin.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'smin.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
@@ -156,7 +156,7 @@ define i8 @smin.i8.v16i8(<16 x i8> %v) {
 
 define i16 @smin.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'smin.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
@@ -165,7 +165,7 @@ define i16 @smin.i16.v4i16(<4 x i16> %v) {
 
 define i16 @smin.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'smin.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
@@ -183,7 +183,7 @@ define i32 @smin.i32.v4i32(<4 x i32> %v) {
 
 define i8 @smax.i8.v8i8(<8 x i8> %v) {
 ; COST-LABEL: 'smax.i8.v8i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
@@ -192,7 +192,7 @@ define i8 @smax.i8.v8i8(<8 x i8> %v) {
 
 define i8 @smax.i8.v16i8(<16 x i8> %v) {
 ; COST-LABEL: 'smax.i8.v16i8'
-; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
 ;
   %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
@@ -201,7 +201,7 @@ define i8 @smax.i8.v16i8(<16 x i8> %v) {
 
 define i16 @smax.i16.v4i16(<4 x i16> %v) {
 ; COST-LABEL: 'smax.i16.v4i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
@@ -210,7 +210,7 @@ define i16 @smax.i16.v4i16(<4 x i16> %v) {
 
 define i16 @smax.i16.v8i16(<8 x i16> %v) {
 ; COST-LABEL: 'smax.i16.v8i16'
-; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
 ; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
 ;
   %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)