[llvm] a92974b - [AArch64] Add and update reduction and shuffle costs. NFC

Thu Jul 22 02:22:52 PDT 2021

Author: David Green
Date: 2021-07-22T10:22:42+01:00
New Revision: a92974bfdf4fe3a3ac5e284ed3db174291bcbcba

URL: https://github.com/llvm/llvm-project/commit/a92974bfdf4fe3a3ac5e284ed3db174291bcbcba
DIFF: https://github.com/llvm/llvm-project/commit/a92974bfdf4fe3a3ac5e284ed3db174291bcbcba.diff

LOG: [AArch64] Add and update reduction and shuffle costs. NFC

Added: 
    llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
    llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll

Modified: 
    llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
    llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
    llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
new file mode 100644
index 0000000000000..8bbb96f20dc9a

--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s
+
+define void @reduce() {
+; CHECK-LABEL: 'reduce'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef)
+  %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef)
+  %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
+  %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
+  %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
+  %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
+  %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
+  %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
+  %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
+  %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
+  %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
+  %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
+  %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
+  %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
+  %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
+  %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
+  ret void
+}
+
+declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
+declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
index 6d2fdaf573136..07f5b7fa0d54e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s
 
-define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
+define void @reduce() {
+; CHECK-LABEL: 'reduce'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
@@ -53,7 +53,7 @@ define i32 @reduce_i1(i32 %arg) {
   %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
   %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
   %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
-  ret i32 undef
+  ret void
 }
 
 declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
new file mode 100644
index 0000000000000..a8c75a9c92221
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -0,0 +1,296 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s
+
+define void @reduce_umin() {
+; CHECK-LABEL: 'reduce_umin'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef)
+  %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef)
+  %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef)
+  %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef)
+  %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)
+  %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef)
+  %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef)
+  %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef)
+  %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef)
+  %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef)
+  %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef)
+  %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef)
+  %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef)
+  %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef)
+  %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef)
+  %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef)
+  ret void
+}
+
+define void @reduce_umax() {
+; CHECK-LABEL: 'reduce_umax'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef)
+  %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef)
+  %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef)
+  %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef)
+  %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef)
+  %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef)
+  %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef)
+  %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef)
+  %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef)
+  %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef)
+  %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef)
+  %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef)
+  %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef)
+  %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef)
+  %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef)
+  %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef)
+  ret void
+}
+
+define void @reduce_smin() {
+; CHECK-LABEL: 'reduce_smin'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef)
+  %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef)
+  %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef)
+  %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef)
+  %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef)
+  %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef)
+  %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef)
+  %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef)
+  %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef)
+  %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef)
+  %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef)
+  %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef)
+  %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef)
+  %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef)
+  %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef)
+  %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef)
+  ret void
+}
+
+define void @reduce_smax() {
+; CHECK-LABEL: 'reduce_smax'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef)
+  %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef)
+  %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef)
+  %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef)
+  %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef)
+  %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef)
+  %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef)
+  %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef)
+  %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef)
+  %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef)
+  %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef)
+  %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef)
+  %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef)
+  %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef)
+  %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef)
+  %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef)
+  ret void
+}
+
+define void @reduce_fmin() {
+; CHECK-LABEL: 'reduce_fmin'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+  %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+  %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+  %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+  %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+  %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+  %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+  %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+  %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+  ret void
+}
+
+define void @reduce_fmax() {
+; CHECK-LABEL: 'reduce_fmax'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+  %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+  %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+  %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+  %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+  %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+  %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+  %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+  %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+  ret void
+}
+
+declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>)
+declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
+declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
+
+declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>)
+declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
+declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
+
+declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>)
+declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
+declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
+
+declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>)
+declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
+declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>)
+declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
+declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
+
+declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
+
+declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
index 82068c736533d..1492fa4364bc6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s
 
-define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
+define void @reduce() {
+; CHECK-LABEL: 'reduce'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
@@ -53,7 +53,7 @@ define i32 @reduce_i1(i32 %arg) {
   %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
   %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
   %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
-  ret i32 undef
+  ret void
 }
 
 declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
index cebcc3aace493..ad8fdb5e0a736 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s
 
-define i32 @reduce_i1(i32 %arg) {
-; CHECK-LABEL: 'reduce_i1'
+define void @reduce() {
+; CHECK-LABEL: 'reduce'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef)
@@ -26,7 +26,7 @@ define i32 @reduce_i1(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 
   %V1   = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef)
@@ -53,7 +53,7 @@ define i32 @reduce_i1(i32 %arg) {
   %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
   %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
   %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
-  ret i32 undef
+  ret void
 }
 
 declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
new file mode 100644
index 0000000000000..646406ed5114e
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s
+
+define void @shuffle() {
+; CHECK-LABEL: 'shuffle'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+
+  %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+
+  %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+
+  %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
index 3ddf90f12c2ef..df25a6fedf2b8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
@@ -1,245 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
-; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
 
-; COST-LABEL: add.i8.v8i8
-; COST:       Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v)
-; CODE-LABEL: add.i8.v8i8
-; CODE:       addv b0, v0.8b
 define i8 @add.i8.v8i8(<8 x i8> %v) {
+; COST-LABEL: 'add.i8.v8i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: add.i8.v16i8
-; COST:       Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v)
-; CODE-LABEL: add.i8.v16i8
-; CODE:       addv b0, v0.16b
 define i8 @add.i8.v16i8(<16 x i8> %v) {
+; COST-LABEL: 'add.i8.v16i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: add.i16.v4i16
-; COST:       Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v)
-; CODE-LABEL: add.i16.v4i16
-; CODE:       addv h0, v0.4h
 define i16 @add.i16.v4i16(<4 x i16> %v) {
+; COST-LABEL: 'add.i16.v4i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: add.i16.v8i16
-; COST:       Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v)
-; CODE-LABEL: add.i16.v8i16
-; CODE:       addv h0, v0.8h
 define i16 @add.i16.v8i16(<8 x i16> %v) {
+; COST-LABEL: 'add.i16.v8i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: add.i32.v4i32
-; COST:       Found an estimated cost of 1 for instruction: %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v)
-; CODE-LABEL: add.i32.v4i32
-; CODE:       addv s0, v0.4s
 define i32 @add.i32.v4i32(<4 x i32> %v) {
+; COST-LABEL: 'add.i32.v4i32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
+;
   %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v)
   ret i32 %r
 }
 
-; COST-LABEL: umin.i8.v8i8
-; COST:       Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
-; CODE-LABEL: umin.i8.v8i8
-; CODE:       uminv b0, v0.8b
 define i8 @umin.i8.v8i8(<8 x i8> %v) {
+; COST-LABEL: 'umin.i8.v8i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: umin.i8.v16i8
-; COST:       Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
-; CODE-LABEL: umin.i8.v16i8
-; CODE:       uminv b0, v0.16b
 define i8 @umin.i8.v16i8(<16 x i8> %v) {
+; COST-LABEL: 'umin.i8.v16i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: umin.i16.v4i16
-; COST:       Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
-; CODE-LABEL: umin.i16.v4i16
-; CODE:       uminv h0, v0.4h
 define i16 @umin.i16.v4i16(<4 x i16> %v) {
+; COST-LABEL: 'umin.i16.v4i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: umin.i16.v8i16
-; COST:       Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
-; CODE-LABEL: umin.i16.v8i16
-; CODE:       uminv h0, v0.8h
 define i16 @umin.i16.v8i16(<8 x i16> %v) {
+; COST-LABEL: 'umin.i16.v8i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: umin.i32.v4i32
-; COST:       Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
-; CODE-LABEL: umin.i32.v4i32
-; CODE:       uminv s0, v0.4s
 define i32 @umin.i32.v4i32(<4 x i32> %v) {
+; COST-LABEL: 'umin.i32.v4i32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
+;
   %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
   ret i32 %r
 }
 
-; COST-LABEL: umax.i8.v8i8
-; COST:       Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
-; CODE-LABEL: umax.i8.v8i8
-; CODE:       umaxv b0, v0.8b
 define i8 @umax.i8.v8i8(<8 x i8> %v) {
+; COST-LABEL: 'umax.i8.v8i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: umax.i8.v16i8
-; COST:       Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
-; CODE-LABEL: umax.i8.v16i8
-; CODE:       umaxv b0, v0.16b
 define i8 @umax.i8.v16i8(<16 x i8> %v) {
+; COST-LABEL: 'umax.i8.v16i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: umax.i16.v4i16
-; COST:       Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
-; CODE-LABEL: umax.i16.v4i16
-; CODE:       umaxv h0, v0.4h
 define i16 @umax.i16.v4i16(<4 x i16> %v) {
+; COST-LABEL: 'umax.i16.v4i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: umax.i16.v8i16
-; COST:       Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
-; CODE-LABEL: umax.i16.v8i16
-; CODE:       umaxv h0, v0.8h
 define i16 @umax.i16.v8i16(<8 x i16> %v) {
+; COST-LABEL: 'umax.i16.v8i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: umax.i32.v4i32
-; COST:       Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
-; CODE-LABEL: umax.i32.v4i32
-; CODE:       umaxv s0, v0.4s
 define i32 @umax.i32.v4i32(<4 x i32> %v) {
+; COST-LABEL: 'umax.i32.v4i32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
+;
   %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
   ret i32 %r
 }
 
-; COST-LABEL: smin.i8.v8i8
-; COST:       Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
-; CODE-LABEL: smin.i8.v8i8
-; CODE:       sminv b0, v0.8b
 define i8 @smin.i8.v8i8(<8 x i8> %v) {
+; COST-LABEL: 'smin.i8.v8i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: smin.i8.v16i8
-; COST:       Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
-; CODE-LABEL: smin.i8.v16i8
-; CODE:       sminv b0, v0.16b
 define i8 @smin.i8.v16i8(<16 x i8> %v) {
+; COST-LABEL: 'smin.i8.v16i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: smin.i16.v4i16
-; COST:       Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
-; CODE-LABEL: smin.i16.v4i16
-; CODE:       sminv h0, v0.4h
 define i16 @smin.i16.v4i16(<4 x i16> %v) {
+; COST-LABEL: 'smin.i16.v4i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: smin.i16.v8i16
-; COST:       Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
-; CODE-LABEL: smin.i16.v8i16
-; CODE:       sminv h0, v0.8h
 define i16 @smin.i16.v8i16(<8 x i16> %v) {
+; COST-LABEL: 'smin.i16.v8i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: smin.i32.v4i32
-; COST:       Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
-; CODE-LABEL: smin.i32.v4i32
-; CODE:       sminv s0, v0.4s
 define i32 @smin.i32.v4i32(<4 x i32> %v) {
+; COST-LABEL: 'smin.i32.v4i32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
+;
   %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
   ret i32 %r
 }
 
-; COST-LABEL: smax.i8.v8i8
-; COST:       Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
-; CODE-LABEL: smax.i8.v8i8
-; CODE:       smaxv b0, v0.8b
 define i8 @smax.i8.v8i8(<8 x i8> %v) {
+; COST-LABEL: 'smax.i8.v8i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: smax.i8.v16i8
-; COST:       Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
-; CODE-LABEL: smax.i8.v16i8
-; CODE:       smaxv b0, v0.16b
 define i8 @smax.i8.v16i8(<16 x i8> %v) {
+; COST-LABEL: 'smax.i8.v16i8'
+; COST-NEXT:  Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r
+;
   %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v)
   ret i8 %r
 }
 
-; COST-LABEL: smax.i16.v4i16
-; COST:       Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
-; CODE-LABEL: smax.i16.v4i16
-; CODE:       smaxv h0, v0.4h
 define i16 @smax.i16.v4i16(<4 x i16> %v) {
+; COST-LABEL: 'smax.i16.v4i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: smax.i16.v8i16
-; COST:       Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
-; CODE-LABEL: smax.i16.v8i16
-; CODE:       smaxv h0, v0.8h
 define i16 @smax.i16.v8i16(<8 x i16> %v) {
+; COST-LABEL: 'smax.i16.v8i16'
+; COST-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
+;
   %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v)
   ret i16 %r
 }
 
-; COST-LABEL: smax.i32.v4i32
-; COST:       Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
-; CODE-LABEL: smax.i32.v4i32
-; CODE:       smaxv s0, v0.4s
 define i32 @smax.i32.v4i32(<4 x i32> %v) {
+; COST-LABEL: 'smax.i32.v4i32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
+;
   %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
   ret i32 %r
 }
 
-; COST-LABEL: fmin.f32.v4f32
-; COST:       Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
-; CODE-LABEL: fmin.f32.v4f32
-; CODE:       fminnmv s0, v0.4s
 define float @fmin.f32.v4f32(<4 x float> %v) {
+; COST-LABEL: 'fmin.f32.v4f32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %r
+;
   %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
   ret float %r
 }
 
-; COST-LABEL: fmax.f32.v4f32
-; COST:       Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
-; CODE-LABEL: fmax.f32.v4f32
-; CODE:       fmaxnmv s0, v0.4s
 define float @fmax.f32.v4f32(<4 x float> %v) {
+; COST-LABEL: 'fmax.f32.v4f32'
+; COST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
+; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %r
+;
   %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
   ret float %r
 }