[llvm] 3b0e120 - [RISCV] Add tests for @llvm.vector.reduce.fmul. NFC

Thu Oct 3 23:37:08 PDT 2024

Author: Luke Lau
Date: 2024-10-04T14:27:45+08:00
New Revision: 3b0e120336846f450d9dd46936934be7a860f58d

URL: https://github.com/llvm/llvm-project/commit/3b0e120336846f450d9dd46936934be7a860f58d
DIFF: https://github.com/llvm/llvm-project/commit/3b0e120336846f450d9dd46936934be7a860f58d.diff

LOG: [RISCV] Add tests for @llvm.vector.reduce.fmul. NFC

Added: 
    llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
new file mode 100644
index 00000000000000..da1336aa724c95

--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s  --check-prefix=SIZE
+
+define void @reduce_fmul_half() {
+; FP-REDUCE-ZVFH-LABEL: 'reduce_fmul_half'
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_fmul_half'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
+  %V2 = call fast half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
+  %V4 = call fast half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
+  %V8 = call fast half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
+  %V16 = call fast half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
+  %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
+  %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
+  %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
+  ret void
+}
+
+define void @reduce_fmul_float() {
+; FP-REDUCE-LABEL: 'reduce_fmul_float'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 121 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_fmul_float'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
+  %V2 = call fast float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
+  %V4 = call fast float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
+  %V8 = call fast float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
+  %V16 = call fast float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
+  %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
+  %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
+  %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
+  ret void
+}
+
+define void @reduce_fmul_double() {
+; FP-REDUCE-LABEL: 'reduce_fmul_double'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 91 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 361 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_fmul_double'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
+  %V2 = call fast double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
+  %V4 = call fast double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
+  %V8 = call fast double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
+  %V16 = call fast double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
+  %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
+  %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
+  %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
+  ret void
+}
+
+define void @reduce_ordered_fmul_half() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fmul_half'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_ordered_fmul_half'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
+  %V2 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef)
+  %V4 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef)
+  %V8 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef)
+  %V16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef)
+  %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
+  %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
+  %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
+  ret void
+}
+
+define void @reduce_ordered_fmul_float() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fmul_float'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_ordered_fmul_float'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
+  %V2 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef)
+  %V4 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef)
+  %V8 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef)
+  %V16 = call float @llvm.vector.reduce.fmul.v16f32(float 0.0, <16 x float> undef)
+  %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
+  %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
+  %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
+  ret void
+}
+
+define void @reduce_ordered_fmul_double() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fmul_double'
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_ordered_fmul_double'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
+  %V2 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef)
+  %V4 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef)
+  %V8 = call double @llvm.vector.reduce.fmul.v8f64(double 0.0, <8 x double> undef)
+  %V16 = call double @llvm.vector.reduce.fmul.v16f64(double 0.0, <16 x double> undef)
+  %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
+  %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
+  %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
+  ret void
+}