[llvm] 5afb161 - [ARM] Add various vector reduce costmodel tests. NFC

Mon Sep 4 02:51:03 PDT 2023

Author: David Green
Date: 2023-09-04T10:50:58+01:00
New Revision: 5afb161ed57d073c7af777afd792e8c14857d614

URL: https://github.com/llvm/llvm-project/commit/5afb161ed57d073c7af777afd792e8c14857d614
DIFF: https://github.com/llvm/llvm-project/commit/5afb161ed57d073c7af777afd792e8c14857d614.diff

LOG: [ARM] Add various vector reduce costmodel tests. NFC

See D159367 and the followups.

Added: 
    llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
    llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
    llvm/test/Analysis/CostModel/ARM/reduce-fp.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
new file mode 100644
index 00000000000000..e0ea2ab10d2390

--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll
@@ -0,0 +1,217 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
+
+define void @and() {
+; CHECK-V8-LABEL: 'and'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'and'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef)
+  %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef)
+  %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef)
+  %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef)
+  %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef)
+  %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef)
+  %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef)
+  %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef)
+  %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef)
+  %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef)
+  %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef)
+  %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef)
+  %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef)
+  %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef)
+  %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef)
+  ret void
+}
+
+define void @or() {
+; CHECK-V8-LABEL: 'or'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'or'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef)
+  %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef)
+  %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef)
+  %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef)
+  %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef)
+  %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef)
+  %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef)
+  %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef)
+  %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef)
+  %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef)
+  %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef)
+  %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef)
+  %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef)
+  %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef)
+  %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef)
+  ret void
+}
+
+define void @xor() {
+; CHECK-V8-LABEL: 'xor'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'xor'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef)
+  %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef)
+  %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef)
+  %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef)
+  %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef)
+  %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef)
+  %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef)
+  %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef)
+  %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef)
+  %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef)
+  %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef)
+  %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef)
+  %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef)
+  %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef)
+  %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef)
+  ret void
+}
+
+declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
+declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
+declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
new file mode 100644
index 00000000000000..14b27062eebb66
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @fmin_strict() {
+; CHECK-V8-LABEL: 'fmin_strict'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fmin_strict'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fmin_strict'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+  %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+  %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+  %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+  %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+  %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+  %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+  %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+  %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+  %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+  ret void
+}
+
+
+define void @fmin_unordered() {
+; CHECK-V8-LABEL: 'fmin_unordered'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fmin_unordered'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fmin_unordered'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+  %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+  %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+  %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+  %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef)
+  %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef)
+  %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef)
+  %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef)
+  %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef)
+  %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef)
+  ret void
+}
+
+define void @fmax_strict() {
+; CHECK-V8-LABEL: 'fmax_strict'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fmax_strict'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fmax_strict'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+  %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+  %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+  %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+  %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+  %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+  %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+  %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+  %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+  %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+  ret void
+}
+
+
+define void @fmax_unordered() {
+; CHECK-V8-LABEL: 'fmax_unordered'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fmax_unordered'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 133 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fmax_unordered'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 409 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 545 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+  %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+  %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+  %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+  %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef)
+  %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef)
+  %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef)
+  %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef)
+  %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef)
+  %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef)
+  ret void
+}
+
+
+declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
+declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
+declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>)
+
+
+declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
+declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
+declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
+declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
+declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
+declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>)

diff  --git a/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll
new file mode 100644
index 00000000000000..26ee8155f17ba1
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP
+; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @fadd_strict() {
+; CHECK-V8-LABEL: 'fadd_strict'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fadd_strict'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fadd_strict'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+  %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
+  %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
+  %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
+  %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
+  %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+  ret void
+}
+
+
+define void @fadd_unordered() {
+; CHECK-V8-LABEL: 'fadd_unordered'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fadd_unordered'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fadd_unordered'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 169 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 225 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef)
+  %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef)
+  %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef)
+  %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef)
+  %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef)
+  %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
+  %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
+  %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
+  %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef)
+  %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
+  ret void
+}
+
+define void @fmul_strict() {
+; CHECK-V8-LABEL: 'fmul_strict'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fmul_strict'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fmul_strict'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
+  %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
+  %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
+  %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
+  %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
+  %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
+  %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
+  %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
+  %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
+  %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+  ret void
+}
+
+
+define void @fmul_unordered() {
+; CHECK-V8-LABEL: 'fmul_unordered'
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-V8-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEFP-LABEL: 'fmul_unordered'
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 137 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEFP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-MVEI-LABEL: 'fmul_unordered'
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 169 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 225 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+; CHECK-MVEI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
+  %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
+  %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
+  %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
+  %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
+  %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
+  %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
+  %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
+  %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
+  %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
+  ret void
+}
+
+
+declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
+declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
+declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
+declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
+declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
+declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>)
+
+
+declare half @llvm.vector.reduce.fmul.v2f16(half, <2 x half>)
+declare half @llvm.vector.reduce.fmul.v4f16(half, <4 x half>)
+declare half @llvm.vector.reduce.fmul.v8f16(half, <8 x half>)
+declare half @llvm.vector.reduce.fmul.v16f16(half, <16 x half>)
+declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>)
+declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>)
+declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>)
+declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>)
+declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
+declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)