[llvm] a100f63 - [RISCV] Add FP cost model tests for no zfhmin/zfbfmin. NFC

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 27 20:28:05 PDT 2025


Author: Luke Lau
Date: 2025-07-28T11:27:30+08:00
New Revision: a100f6367205c6a909d68027af6a8675a8091bd9

URL: https://github.com/llvm/llvm-project/commit/a100f6367205c6a909d68027af6a8675a8091bd9
DIFF: https://github.com/llvm/llvm-project/commit/a100f6367205c6a909d68027af6a8675a8091bd9.diff

LOG: [RISCV] Add FP cost model tests for no zfhmin/zfbfmin. NFC

Vector costs without zvfhmin/zvfbfmin and zfhmin/zfbfmin are somehow
cheaper than with zvfhmin/zvfbfmin at smaller vector sizes, despite the
fact that the former are scalarized to libcalls. This adds a RUN line to
showcase this, splitting out the bfloat tests into their own functions
so we don't have duplicate lines for the regular float/double costs.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/RISCV/arith-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
index c6f1f173f8dc0..673bf38d44876 100644
--- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
@@ -1,24 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d < %s | FileCheck %s --check-prefixes=CHECK,NO-ZFHMIN
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64
 
 define void @fadd() {
 ; CHECK-LABEL: 'fadd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> poison, poison
@@ -37,11 +27,6 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd <vscale x 2 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd <vscale x 4 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd <vscale x 8 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fadd.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -51,11 +36,6 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -67,22 +47,9 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = fadd bfloat poison, poison
   %F32 = fadd float poison, poison
   %F64 = fadd double poison, poison
 
-  %V1BF16 = fadd <1 x bfloat> poison, poison
-  %V2BF16 = fadd <2 x bfloat> poison, poison
-  %V4BF16 = fadd <4 x bfloat> poison, poison
-  %V8BF16 = fadd <8 x bfloat> poison, poison
-  %V16BF16 = fadd <16 x bfloat> poison, poison
-
-  %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
-  %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
-  %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
-  %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
-  %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
-
   %V1F32 = fadd <1 x float> poison, poison
   %V2F32 = fadd <2 x float> poison, poison
   %V4F32 = fadd <4 x float> poison, poison
@@ -105,12 +72,6 @@ define void @fadd() {
   %NXV4F64 = fadd <vscale x 4 x double> poison, poison
   %NXV8F64 = fadd <vscale x 8 x double> poison, poison
 
-  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
-
   %V1F32_VP = call <1 x float> @llvm.vp.fadd(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
   %V2F32_VP = call <2 x float> @llvm.vp.fadd(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
   %V4F32_VP = call <4 x float> @llvm.vp.fadd(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -122,12 +83,6 @@ define void @fadd() {
   %V4F64_VP = call <4 x double> @llvm.vp.fadd(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
   %V8F64_VP = call <8 x double> @llvm.vp.fadd(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
 
-  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-
   %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fadd(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
   %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fadd(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
   %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fadd(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -142,6 +97,108 @@ define void @fadd() {
   ret void
 }
 
+define void @fadd_bf16() {
+; ZVFH-LABEL: 'fadd_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fadd_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fadd_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fadd bfloat poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fadd <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fadd <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fadd <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fadd <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fadd <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = fadd bfloat poison, poison
+
+  %V1BF16 = fadd <1 x bfloat> poison, poison
+  %V2BF16 = fadd <2 x bfloat> poison, poison
+  %V4BF16 = fadd <4 x bfloat> poison, poison
+  %V8BF16 = fadd <8 x bfloat> poison, poison
+  %V16BF16 = fadd <16 x bfloat> poison, poison
+
+  %NXV1BF16 = fadd <vscale x 1 x bfloat> poison, poison
+  %NXV2BF16 = fadd <vscale x 2 x bfloat> poison, poison
+  %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+  %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+  %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+
+  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fadd(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fadd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fadd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fadd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fadd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fadd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fadd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fadd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fadd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fadd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  ret void
+}
+
 define void @fadd_f16() {
 ; ZVFH-LABEL: 'fadd_f16'
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison
@@ -194,6 +251,32 @@ define void @fadd_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fadd_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fadd <2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fadd <4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fadd <8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fadd <16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fadd <32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = fadd <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = fadd <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = fadd <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = fadd <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = fadd <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = fadd <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %F16 = fadd half poison, poison
 
@@ -228,19 +311,8 @@ define void @fadd_f16() {
 
 define void @fsub() {
 ; CHECK-LABEL: 'fsub'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> poison, poison
@@ -259,11 +331,6 @@ define void @fsub() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub <vscale x 2 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub <vscale x 4 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub <vscale x 8 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fsub.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -273,11 +340,6 @@ define void @fsub() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -289,22 +351,9 @@ define void @fsub() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = fsub half poison, poison
   %F32 = fsub float poison, poison
   %F64 = fsub double poison, poison
 
-  %V1BF16 = fsub <1 x bfloat> poison, poison
-  %V2BF16 = fsub <2 x bfloat> poison, poison
-  %V4BF16 = fsub <4 x bfloat> poison, poison
-  %V8BF16 = fsub <8 x bfloat> poison, poison
-  %V16BF16 = fsub <16 x bfloat> poison, poison
-
-  %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
-  %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
-  %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
-  %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
-  %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
-
   %V1F32 = fsub <1 x float> poison, poison
   %V2F32 = fsub <2 x float> poison, poison
   %V4F32 = fsub <4 x float> poison, poison
@@ -327,12 +376,6 @@ define void @fsub() {
   %NXV4F64 = fsub <vscale x 4 x double> poison, poison
   %NXV8F64 = fsub <vscale x 8 x double> poison, poison
 
-  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
-
   %V1F32_VP = call <1 x float> @llvm.vp.fsub(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
   %V2F32_VP = call <2 x float> @llvm.vp.fsub(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
   %V4F32_VP = call <4 x float> @llvm.vp.fsub(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -344,12 +387,6 @@ define void @fsub() {
   %V4F64_VP = call <4 x double> @llvm.vp.fsub(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
   %V8F64_VP = call <8 x double> @llvm.vp.fsub(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
 
-  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-
   %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fsub(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
   %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fsub(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
   %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fsub(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -364,6 +401,116 @@ define void @fsub() {
   ret void
 }
 
+define void @fsub_bf16() {
+; ZVFH-LABEL: 'fsub_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fsub_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fsub_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fsub bfloat poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fsub <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fsub <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fsub <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fsub <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fsub <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fsub <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = fsub bfloat poison, poison
+
+  %V1BF16 = fsub <1 x bfloat> poison, poison
+  %V2BF16 = fsub <2 x bfloat> poison, poison
+  %V4BF16 = fsub <4 x bfloat> poison, poison
+  %V8BF16 = fsub <8 x bfloat> poison, poison
+  %V16BF16 = fsub <16 x bfloat> poison, poison
+  %V32BF16 = fsub <32 x bfloat> poison, poison
+
+  %NXV1BF16 = fsub <vscale x 1 x bfloat> poison, poison
+  %NXV2BF16 = fsub <vscale x 2 x bfloat> poison, poison
+  %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+  %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+  %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+  %NXV32BF16 = fsub <vscale x 32 x bfloat> poison, poison
+
+  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fsub(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fsub(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fsub(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fsub(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fsub(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fsub(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fsub(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fsub(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fsub(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fsub(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  ret void
+}
+
 define void @fsub_f16() {
 ; ZVFH-LABEL: 'fsub_f16'
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison
@@ -416,6 +563,32 @@ define void @fsub_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fsub_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fsub <2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fsub <4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fsub <8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fsub <16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fsub <32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = fsub <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = fsub <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = fsub <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = fsub <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = fsub <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = fsub <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fsub.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fsub.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fsub.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fsub.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %F16 = fsub half poison, poison
 
@@ -450,19 +623,8 @@ define void @fsub_f16() {
 
 define void @fmul() {
 ; CHECK-LABEL: 'fmul'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> poison, poison
@@ -481,11 +643,6 @@ define void @fmul() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul <vscale x 2 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul <vscale x 4 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul <vscale x 8 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fmul.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -495,11 +652,6 @@ define void @fmul() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -511,22 +663,9 @@ define void @fmul() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = fmul bfloat poison, poison
   %F32 = fmul float poison, poison
   %F64 = fmul double poison, poison
 
-  %V1BF16 = fmul <1 x bfloat> poison, poison
-  %V2BF16 = fmul <2 x bfloat> poison, poison
-  %V4BF16 = fmul <4 x bfloat> poison, poison
-  %V8BF16 = fmul <8 x bfloat> poison, poison
-  %V16BF16 = fmul <16 x bfloat> poison, poison
-
-  %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
-  %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
-  %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
-  %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
-  %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
-
   %V1F32 = fmul <1 x float> poison, poison
   %V2F32 = fmul <2 x float> poison, poison
   %V4F32 = fmul <4 x float> poison, poison
@@ -549,12 +688,6 @@ define void @fmul() {
   %NXV4F64 = fmul <vscale x 4 x double> poison, poison
   %NXV8F64 = fmul <vscale x 8 x double> poison, poison
 
-  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
-
   %V1F32_VP = call <1 x float> @llvm.vp.fmul(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
   %V2F32_VP = call <2 x float> @llvm.vp.fmul(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
   %V4F32_VP = call <4 x float> @llvm.vp.fmul(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -566,12 +699,6 @@ define void @fmul() {
   %V4F64_VP = call <4 x double> @llvm.vp.fmul(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
   %V8F64_VP = call <8 x double> @llvm.vp.fmul(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
 
-  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-
   %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fmul(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
   %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fmul(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
   %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fmul(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -586,6 +713,116 @@ define void @fmul() {
   ret void
 }
 
+define void @fmul_bf16() {
+; ZVFH-LABEL: 'fmul_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmul_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmul_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fmul bfloat poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fmul <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fmul <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fmul <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fmul <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fmul <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fmul <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = fmul bfloat poison, poison
+
+  %V1BF16 = fmul <1 x bfloat> poison, poison
+  %V2BF16 = fmul <2 x bfloat> poison, poison
+  %V4BF16 = fmul <4 x bfloat> poison, poison
+  %V8BF16 = fmul <8 x bfloat> poison, poison
+  %V16BF16 = fmul <16 x bfloat> poison, poison
+  %V32BF16 = fmul <32 x bfloat> poison, poison
+
+  %NXV1BF16 = fmul <vscale x 1 x bfloat> poison, poison
+  %NXV2BF16 = fmul <vscale x 2 x bfloat> poison, poison
+  %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+  %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+  %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+  %NXV32BF16 = fmul <vscale x 32 x bfloat> poison, poison
+
+  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fmul(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fmul(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fmul(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fmul(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fmul(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fmul(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fmul(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fmul(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fmul(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fmul(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  ret void
+}
+
 define void @fmul_f16() {
 ; ZVFH-LABEL: 'fmul_f16'
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison
@@ -638,6 +875,32 @@ define void @fmul_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmul_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fmul <2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fmul <4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fmul <8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fmul <16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fmul <32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = fmul <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = fmul <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = fmul <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = fmul <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = fmul <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = fmul <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fmul.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fmul.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fmul.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fmul.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %F16 = fmul half poison, poison
 
@@ -672,19 +935,8 @@ define void @fmul_f16() {
 
 define void @fdiv() {
 ; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> poison, poison
@@ -703,11 +955,6 @@ define void @fdiv() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv <vscale x 2 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv <vscale x 4 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv <vscale x 8 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fdiv.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -717,11 +964,6 @@ define void @fdiv() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -733,22 +975,9 @@ define void @fdiv() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = fdiv bfloat poison, poison
   %F32 = fdiv float poison, poison
   %F64 = fdiv double poison, poison
 
-  %V1BF16 = fdiv <1 x bfloat> poison, poison
-  %V2BF16 = fdiv <2 x bfloat> poison, poison
-  %V4BF16 = fdiv <4 x bfloat> poison, poison
-  %V8BF16 = fdiv <8 x bfloat> poison, poison
-  %V16BF16 = fdiv <16 x bfloat> poison, poison
-
-  %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
-  %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
-  %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
-  %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
-  %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
-
   %V1F32 = fdiv <1 x float> poison, poison
   %V2F32 = fdiv <2 x float> poison, poison
   %V4F32 = fdiv <4 x float> poison, poison
@@ -771,12 +1000,6 @@ define void @fdiv() {
   %NXV4F64 = fdiv <vscale x 4 x double> poison, poison
   %NXV8F64 = fdiv <vscale x 8 x double> poison, poison
 
-  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
-
   %V1F32_VP = call <1 x float> @llvm.vp.fdiv(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
   %V2F32_VP = call <2 x float> @llvm.vp.fdiv(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
   %V4F32_VP = call <4 x float> @llvm.vp.fdiv(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -788,11 +1011,6 @@ define void @fdiv() {
   %V4F64_VP = call <4 x double> @llvm.vp.fdiv(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
   %V8F64_VP = call <8 x double> @llvm.vp.fdiv(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
 
-  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 
   %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fdiv(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
   %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fdiv(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
@@ -808,6 +1026,116 @@ define void @fdiv() {
   ret void
 }
 
+define void @fdiv_bf16() {
+; ZVFH-LABEL: 'fdiv_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fdiv_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fdiv_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fdiv bfloat poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fdiv <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fdiv <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fdiv <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fdiv <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fdiv <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fdiv <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = fdiv bfloat poison, poison
+
+  %V1BF16 = fdiv <1 x bfloat> poison, poison
+  %V2BF16 = fdiv <2 x bfloat> poison, poison
+  %V4BF16 = fdiv <4 x bfloat> poison, poison
+  %V8BF16 = fdiv <8 x bfloat> poison, poison
+  %V16BF16 = fdiv <16 x bfloat> poison, poison
+  %V32BF16 = fdiv <32 x bfloat> poison, poison
+
+  %NXV1BF16 = fdiv <vscale x 1 x bfloat> poison, poison
+  %NXV2BF16 = fdiv <vscale x 2 x bfloat> poison, poison
+  %NXV4BF16 = fdiv <vscale x 4 x bfloat> poison, poison
+  %NXV8BF16 = fdiv <vscale x 8 x bfloat> poison, poison
+  %NXV16BF16 = fdiv <vscale x 16 x bfloat> poison, poison
+  %NXV32BF16 = fdiv <vscale x 32 x bfloat> poison, poison
+
+  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fdiv(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fdiv(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fdiv(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fdiv(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fdiv(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fdiv(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fdiv(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fdiv(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fdiv(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fdiv(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  ret void
+}
+
 define void @fdiv_f16() {
 ; ZVFH-LABEL: 'fdiv_f16'
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison
@@ -860,6 +1188,32 @@ define void @fdiv_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fdiv_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fdiv <2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fdiv <4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fdiv <8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fdiv <16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fdiv <32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = fdiv <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = fdiv <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = fdiv <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = fdiv <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = fdiv <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = fdiv <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fdiv.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fdiv.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fdiv.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fdiv.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %F16 = fdiv half poison, poison
 
@@ -894,19 +1248,8 @@ define void @fdiv_f16() {
 
 define void @frem() {
 ; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> poison, poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> poison, poison
@@ -925,11 +1268,6 @@ define void @frem() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> poison, poison
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.frem.v1f32(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.frem.v2f32(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -939,11 +1277,6 @@ define void @frem() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2F64_VP = call <2 x double> @llvm.vp.frem.v2f64(<2 x double> poison, <2 x double> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.frem.v4f64(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.frem.v8f64(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -955,22 +1288,9 @@ define void @frem() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.frem.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = frem bfloat poison, poison
   %F32 = frem float poison, poison
   %F64 = frem double poison, poison
 
-  %V1BF16 = frem <1 x bfloat> poison, poison
-  %V2BF16 = frem <2 x bfloat> poison, poison
-  %V4BF16 = frem <4 x bfloat> poison, poison
-  %V8BF16 = frem <8 x bfloat> poison, poison
-  %V16BF16 = frem <16 x bfloat> poison, poison
-
-  %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
-  %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
-  %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
-  %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
-  %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
-
   %V1F32 = frem <1 x float> poison, poison
   %V2F32 = frem <2 x float> poison, poison
   %V4F32 = frem <4 x float> poison, poison
@@ -993,12 +1313,6 @@ define void @frem() {
   %NXV4F64 = frem <vscale x 4 x double> poison, poison
   %NXV8F64 = frem <vscale x 8 x double> poison, poison
 
-  %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
-  %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
-  %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
-  %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
-  %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
-
   %V1F32_VP = call <1 x float> @llvm.vp.frem(<1 x float> poison, <1 x float> poison, <1 x i1> poison, i32 poison)
   %V2F32_VP = call <2 x float> @llvm.vp.frem(<2 x float> poison, <2 x float> poison, <2 x i1> poison, i32 poison)
   %V4F32_VP = call <4 x float> @llvm.vp.frem(<4 x float> poison, <4 x float> poison, <4 x i1> poison, i32 poison)
@@ -1010,12 +1324,6 @@ define void @frem() {
   %V4F64_VP = call <4 x double> @llvm.vp.frem(<4 x double> poison, <4 x double> poison, <4 x i1> poison, i32 poison)
   %V8F64_VP = call <8 x double> @llvm.vp.frem(<8 x double> poison, <8 x double> poison, <8 x i1> poison, i32 poison)
 
-  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-
   %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.frem(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
   %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.frem(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
   %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.frem(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
@@ -1030,22 +1338,194 @@ define void @frem() {
   ret void
 }
 
+define void @frem_bf16() {
+; ZVFH-LABEL: 'frem_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'frem_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'frem_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = frem <2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = frem <4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = frem <8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = frem <16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = frem <32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = frem bfloat poison, poison
+
+  %V1BF16 = frem <1 x bfloat> poison, poison
+  %V2BF16 = frem <2 x bfloat> poison, poison
+  %V4BF16 = frem <4 x bfloat> poison, poison
+  %V8BF16 = frem <8 x bfloat> poison, poison
+  %V16BF16 = frem <16 x bfloat> poison, poison
+  %V32BF16 = frem <32 x bfloat> poison, poison
+
+  %NXV1BF16 = frem <vscale x 1 x bfloat> poison, poison
+  %NXV2BF16 = frem <vscale x 2 x bfloat> poison, poison
+  %NXV4BF16 = frem <vscale x 4 x bfloat> poison, poison
+  %NXV8BF16 = frem <vscale x 8 x bfloat> poison, poison
+  %NXV16BF16 = frem <vscale x 16 x bfloat> poison, poison
+  %NXV32BF16 = frem <vscale x 32 x bfloat> poison, poison
+
+  %V1BF16_VP = call <1 x bfloat> @llvm.vp.frem(<1 x bfloat> poison, <1 x bfloat> poison, <1 x i1> poison, i32 poison)
+  %V2BF16_VP = call <2 x bfloat> @llvm.vp.frem(<2 x bfloat> poison, <2 x bfloat> poison, <2 x i1> poison, i32 poison)
+  %V4BF16_VP = call <4 x bfloat> @llvm.vp.frem(<4 x bfloat> poison, <4 x bfloat> poison, <4 x i1> poison, i32 poison)
+  %V8BF16_VP = call <8 x bfloat> @llvm.vp.frem(<8 x bfloat> poison, <8 x bfloat> poison, <8 x i1> poison, i32 poison)
+  %V16BF16_VP = call <16 x bfloat> @llvm.vp.frem(<16 x bfloat> poison, <16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.frem(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.frem(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.frem(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.frem(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.frem(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  ret void
+}
+
 define void @frem_f16() {
-; CHECK-LABEL: 'frem_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'frem_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'frem_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'frem_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = frem <1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = frem <2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = frem <4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = frem <8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = frem <16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = frem <32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> poison, poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.frem.v1f16(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.frem.v2f16(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.frem.v4f16(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.frem.v8f16(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.frem.v16f16(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %F16 = frem half poison, poison
 
@@ -1063,24 +1543,25 @@ define void @frem_f16() {
   %NXV16F16 = frem <vscale x 16 x half> poison, poison
   %NXV32F16 = frem <vscale x 32 x half> poison, poison
 
+  %V1F16_VP = call <1 x half> @llvm.vp.frem(<1 x half> poison, <1 x half> poison, <1 x i1> poison, i32 poison)
+  %V2F16_VP = call <2 x half> @llvm.vp.frem(<2 x half> poison, <2 x half> poison, <2 x i1> poison, i32 poison)
+  %V4F16_VP = call <4 x half> @llvm.vp.frem(<4 x half> poison, <4 x half> poison, <4 x i1> poison, i32 poison)
+  %V8F16_VP = call <8 x half> @llvm.vp.frem(<8 x half> poison, <8 x half> poison, <8 x i1> poison, i32 poison)
+  %V16F16_VP = call <16 x half> @llvm.vp.frem(<16 x half> poison, <16 x half> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.frem(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.frem(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.frem(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.frem(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.frem(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+
   ret void
 }
 
 define void @fneg() {
 ; CHECK-LABEL: 'fneg'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg half poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> poison
@@ -1099,11 +1580,6 @@ define void @fneg() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg <vscale x 2 x double> poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg <vscale x 4 x double> poison
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg <vscale x 8 x double> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32_VP = call <1 x float> @llvm.vp.fneg.v1f32(<1 x float> poison, <1 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32_VP = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> poison, <2 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32_VP = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> poison, <4 x i1> poison, i32 poison)
@@ -1114,39 +1590,21 @@ define void @fneg() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64_VP = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> poison, <4 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64_VP = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> poison, <8 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F64_VP = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> poison, <16 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV4BF32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV8BF32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV16BF32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2BF64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4BF64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8BF64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NXV16BF64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %NXV16F64_VP = call <vscale x 16 x double> @llvm.vp.fneg.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = fneg half poison
   %F32 = fneg float poison
   %F64 = fneg double poison
 
-  %V1BF16 = fneg <1 x bfloat> poison
-  %V2BF16 = fneg <2 x bfloat> poison
-  %V4BF16 = fneg <4 x bfloat> poison
-  %V8BF16 = fneg <8 x bfloat> poison
-  %V16BF16 = fneg <16 x bfloat> poison
-
-  %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
-  %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
-  %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
-  %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
-  %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
-
   %V1F32 = fneg <1 x float> poison
   %V2F32 = fneg <2 x float> poison
   %V4F32 = fneg <4 x float> poison
@@ -1169,12 +1627,6 @@ define void @fneg() {
   %NXV4F64 = fneg <vscale x 4 x double> poison
   %NXV8F64 = fneg <vscale x 8 x double> poison
 
-  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
-  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
-  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
-  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
-  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
-
   %V1F32_VP = call <1 x float> @llvm.vp.fneg(<1 x float> poison, <1 x i1> poison, i32 poison)
   %V2F32_VP = call <2 x float> @llvm.vp.fneg(<2 x float> poison, <2 x i1> poison, i32 poison)
   %V4F32_VP = call <4 x float> @llvm.vp.fneg(<4 x float> poison, <4 x i1> poison, i32 poison)
@@ -1187,43 +1639,209 @@ define void @fneg() {
   %V8F64_VP = call <8 x double> @llvm.vp.fneg(<8 x double> poison, <8 x i1> poison, i32 poison)
   %V16F64_VP = call <16 x double> @llvm.vp.fneg(<16 x double> poison, <16 x i1> poison, i32 poison)
 
+  %NXV1F32_VP = call <vscale x 1 x float> @llvm.vp.fneg(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2F32_VP = call <vscale x 2 x float> @llvm.vp.fneg(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4F32_VP = call <vscale x 4 x float> @llvm.vp.fneg(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8F32_VP = call <vscale x 8 x float> @llvm.vp.fneg(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16F32_VP = call <vscale x 16 x float> @llvm.vp.fneg(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  %NXV1F64_VP = call <vscale x 1 x double> @llvm.vp.fneg(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2F64_VP = call <vscale x 2 x double> @llvm.vp.fneg(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4F64_VP = call <vscale x 4 x double> @llvm.vp.fneg(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8F64_VP = call <vscale x 8 x double> @llvm.vp.fneg(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16F64_VP = call <vscale x 16 x double> @llvm.vp.fneg(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
+
+  ret void
+}
+
+define void @fneg_bf16() {
+; ZVFH-LABEL: 'fneg_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32BF16 = fneg <32 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fneg_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2BF16 = fneg <2 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fneg <4 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fneg <8 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fneg <16 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32BF16 = fneg <32 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fneg_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %BF16 = fneg bfloat poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = fneg <1 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = fneg <2 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = fneg <4 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = fneg <8 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = fneg <16 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = fneg <32 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg.v1bf16(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = fneg bfloat poison
+
+  %V1BF16 = fneg <1 x bfloat> poison
+  %V2BF16 = fneg <2 x bfloat> poison
+  %V4BF16 = fneg <4 x bfloat> poison
+  %V8BF16 = fneg <8 x bfloat> poison
+  %V16BF16 = fneg <16 x bfloat> poison
+  %V32BF16 = fneg <32 x bfloat> poison
+
+  %NXV1BF16 = fneg <vscale x 1 x bfloat> poison
+  %NXV2BF16 = fneg <vscale x 2 x bfloat> poison
+  %NXV4BF16 = fneg <vscale x 4 x bfloat> poison
+  %NXV8BF16 = fneg <vscale x 8 x bfloat> poison
+  %NXV16BF16 = fneg <vscale x 16 x bfloat> poison
+  %NXV32BF16 = fneg <vscale x 32 x bfloat> poison
+
+  %V1BF16_VP = call <1 x bfloat> @llvm.vp.fneg(<1 x bfloat> poison, <1 x i1> poison, i32 poison)
+  %V2BF16_VP = call <2 x bfloat> @llvm.vp.fneg(<2 x bfloat> poison, <2 x i1> poison, i32 poison)
+  %V4BF16_VP = call <4 x bfloat> @llvm.vp.fneg(<4 x bfloat> poison, <4 x i1> poison, i32 poison)
+  %V8BF16_VP = call <8 x bfloat> @llvm.vp.fneg(<8 x bfloat> poison, <8 x i1> poison, i32 poison)
+  %V16BF16_VP = call <16 x bfloat> @llvm.vp.fneg(<16 x bfloat> poison, <16 x i1> poison, i32 poison)
+
   %NXV1BF16_VP = call <vscale x 1 x bfloat> @llvm.vp.fneg(<vscale x 1 x bfloat> poison, <vscale x 1 x i1> poison, i32 poison)
   %NXV2BF16_VP = call <vscale x 2 x bfloat> @llvm.vp.fneg(<vscale x 2 x bfloat> poison, <vscale x 2 x i1> poison, i32 poison)
   %NXV4BF16_VP = call <vscale x 4 x bfloat> @llvm.vp.fneg(<vscale x 4 x bfloat> poison, <vscale x 4 x i1> poison, i32 poison)
   %NXV8BF16_VP = call <vscale x 8 x bfloat> @llvm.vp.fneg(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> poison, i32 poison)
   %NXV16BF16_VP = call <vscale x 16 x bfloat> @llvm.vp.fneg(<vscale x 16 x bfloat> poison, <vscale x 16 x i1> poison, i32 poison)
 
-  %NXV1BF32_VP = call <vscale x 1 x float> @llvm.vp.fneg(<vscale x 1 x float> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF32_VP = call <vscale x 2 x float> @llvm.vp.fneg(<vscale x 2 x float> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF32_VP = call <vscale x 4 x float> @llvm.vp.fneg(<vscale x 4 x float> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF32_VP = call <vscale x 8 x float> @llvm.vp.fneg(<vscale x 8 x float> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF32_VP = call <vscale x 16 x float> @llvm.vp.fneg(<vscale x 16 x float> poison, <vscale x 16 x i1> poison, i32 poison)
-
-  %NXV1BF64_VP = call <vscale x 1 x double> @llvm.vp.fneg(<vscale x 1 x double> poison, <vscale x 1 x i1> poison, i32 poison)
-  %NXV2BF64_VP = call <vscale x 2 x double> @llvm.vp.fneg(<vscale x 2 x double> poison, <vscale x 2 x i1> poison, i32 poison)
-  %NXV4BF64_VP = call <vscale x 4 x double> @llvm.vp.fneg(<vscale x 4 x double> poison, <vscale x 4 x i1> poison, i32 poison)
-  %NXV8BF64_VP = call <vscale x 8 x double> @llvm.vp.fneg(<vscale x 8 x double> poison, <vscale x 8 x i1> poison, i32 poison)
-  %NXV16BF64_VP = call <vscale x 16 x double> @llvm.vp.fneg(<vscale x 16 x double> poison, <vscale x 16 x i1> poison, i32 poison)
-
   ret void
 }
 
 define void @fneg_f16() {
-; CHECK-LABEL: 'fneg_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fneg_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fneg_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fneg_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = fneg <2 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = fneg <4 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = fneg <8 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = fneg <16 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = fneg <32 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = fneg <vscale x 1 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = fneg <vscale x 2 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = fneg <vscale x 4 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = fneg <vscale x 8 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = fneg <vscale x 16 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = fneg <vscale x 32 x half> poison
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fneg.v1f16(<1 x half> poison, <1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> poison, <2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16_VP = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> poison, <4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16_VP = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> poison, <8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16_VP = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> poison, <16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %F16 = fneg half poison
 
@@ -1241,24 +1859,25 @@ define void @fneg_f16() {
   %NXV16F16 = fneg <vscale x 16 x half> poison
   %NXV32F16 = fneg <vscale x 32 x half> poison
 
+  %V1F16_VP = call <1 x half> @llvm.vp.fneg(<1 x half> poison, <1 x i1> poison, i32 poison)
+  %V2F16_VP = call <2 x half> @llvm.vp.fneg(<2 x half> poison, <2 x i1> poison, i32 poison)
+  %V4F16_VP = call <4 x half> @llvm.vp.fneg(<4 x half> poison, <4 x i1> poison, i32 poison)
+  %V8F16_VP = call <8 x half> @llvm.vp.fneg(<8 x half> poison, <8 x i1> poison, i32 poison)
+  %V16F16_VP = call <16 x half> @llvm.vp.fneg(<16 x half> poison, <16 x i1> poison, i32 poison)
+
+  %NXV1F16_VP = call <vscale x 1 x half> @llvm.vp.fneg(<vscale x 1 x half> poison, <vscale x 1 x i1> poison, i32 poison)
+  %NXV2F16_VP = call <vscale x 2 x half> @llvm.vp.fneg(<vscale x 2 x half> poison, <vscale x 2 x i1> poison, i32 poison)
+  %NXV4F16_VP = call <vscale x 4 x half> @llvm.vp.fneg(<vscale x 4 x half> poison, <vscale x 4 x i1> poison, i32 poison)
+  %NXV8F16_VP = call <vscale x 8 x half> @llvm.vp.fneg(<vscale x 8 x half> poison, <vscale x 8 x i1> poison, i32 poison)
+  %NXV16F16_VP = call <vscale x 16 x half> @llvm.vp.fneg(<vscale x 16 x half> poison, <vscale x 16 x i1> poison, i32 poison)
+
   ret void
 }
 
 define void @fcopysign() {
 ; CHECK-LABEL: 'fcopysign'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float poison, float poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double poison, double poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> poison, <1 x float> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> poison, <2 x float> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> poison, <4 x float> poison)
@@ -1279,22 +1898,9 @@ define void @fcopysign() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
   %F32 = call float @llvm.copysign.f32(float poison, float poison)
   %F64 = call double @llvm.copysign.f64(double poison, double poison)
 
-  %V1BF16 = call <1 x bfloat> @llvm.copysign(<1 x bfloat> poison, <1 x bfloat> poison)
-  %V2BF16 = call <2 x bfloat> @llvm.copysign(<2 x bfloat> poison, <2 x bfloat> poison)
-  %V4BF16 = call <4 x bfloat> @llvm.copysign(<4 x bfloat> poison, <4 x bfloat> poison)
-  %V8BF16 = call <8 x bfloat> @llvm.copysign(<8 x bfloat> poison, <8 x bfloat> poison)
-  %V16BF16 = call <16 x bfloat> @llvm.copysign(<16 x bfloat> poison, <16 x bfloat> poison)
-
-  %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
-  %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
-  %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
-  %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
-  %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
-
   %V1F32 = call <1 x float> @llvm.copysign(<1 x float> poison, <1 x float> poison)
   %V2F32 = call <2 x float> @llvm.copysign(<2 x float> poison, <2 x float> poison)
   %V4F32 = call <4 x float> @llvm.copysign(<4 x float> poison, <4 x float> poison)
@@ -1320,6 +1926,74 @@ define void @fcopysign() {
   ret void
 }
 
+define void @fcopysign_bf16() {
+; ZVFH-LABEL: 'fcopysign_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fcopysign_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fcopysign_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat poison, bfloat poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = call <32 x bfloat> @llvm.copysign.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = call bfloat @llvm.copysign(bfloat poison, bfloat poison)
+
+  %V1BF16 = call <1 x bfloat> @llvm.copysign(<1 x bfloat> poison, <1 x bfloat> poison)
+  %V2BF16 = call <2 x bfloat> @llvm.copysign(<2 x bfloat> poison, <2 x bfloat> poison)
+  %V4BF16 = call <4 x bfloat> @llvm.copysign(<4 x bfloat> poison, <4 x bfloat> poison)
+  %V8BF16 = call <8 x bfloat> @llvm.copysign(<8 x bfloat> poison, <8 x bfloat> poison)
+  %V16BF16 = call <16 x bfloat> @llvm.copysign(<16 x bfloat> poison, <16 x bfloat> poison)
+  %V32BF16 = call <32 x bfloat> @llvm.copysign(<32 x bfloat> poison, <32 x bfloat> poison)
+
+  %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.copysign(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+  %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.copysign(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+  %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.copysign(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+  %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.copysign(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+  %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.copysign(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+  %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.copysign(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+
+  ret void
+}
+
 define void @fcopysign_f16() {
 ; ZVFH-LABEL: 'fcopysign_f16'
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison)
@@ -1353,7 +2027,23 @@ define void @fcopysign_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = call half @llvm.copysign.f16(half poison, half poison)
+; NO-ZFHMIN-LABEL: 'fcopysign_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half poison, half poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> poison, <1 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> poison, <2 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> poison, <4 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> poison, <8 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> poison, <16 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> poison, <32 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = call half @llvm.copysign(half poison, half poison)
 
   %V1F16 = call <1 x half> @llvm.copysign(<1 x half> poison, <1 x half> poison)
   %V2F16 = call <2 x half> @llvm.copysign(<2 x half> poison, <2 x half> poison)
@@ -1374,19 +2064,8 @@ define void @fcopysign_f16() {
 
 define void @fma() {
 ; CHECK-LABEL: 'fma'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float poison, float poison, float poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double poison, double poison, double poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> poison, <1 x float> poison, <1 x float> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
@@ -1407,21 +2086,8 @@ define void @fma() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
-  %F32 = call float @llvm.fma.f32(float poison, float poison, float poison)
-  %F64 = call double @llvm.fma.f64(double poison, double poison, double poison)
-
-  %V1BF16 = call <1 x bfloat> @llvm.fma(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
-  %V2BF16 = call <2 x bfloat> @llvm.fma(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
-  %V4BF16 = call <4 x bfloat> @llvm.fma(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
-  %V8BF16 = call <8 x bfloat> @llvm.fma(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
-  %V16BF16 = call <16 x bfloat> @llvm.fma(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
-
-  %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
-  %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
-  %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
-  %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
-  %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+  %F32 = call float @llvm.fma(float poison, float poison, float poison)
+  %F64 = call double @llvm.fma(double poison, double poison, double poison)
 
   %V1F32 = call <1 x float> @llvm.fma(<1 x float> poison, <1 x float> poison, <1 x float> poison)
   %V2F32 = call <2 x float> @llvm.fma(<2 x float> poison, <2 x float> poison, <2 x float> poison)
@@ -1448,24 +2114,124 @@ define void @fma() {
   ret void
 }
 
+define void @fma_bf16() {
+; ZVFH-LABEL: 'fma_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fma_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fma_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat poison, bfloat poison, bfloat poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16BF16 = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32BF16 = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma.nxv32bf16(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %BF16 = call bfloat @llvm.fma(bfloat poison, bfloat poison, bfloat poison)
+
+  %V1BF16 = call <1 x bfloat> @llvm.fma(<1 x bfloat> poison, <1 x bfloat> poison, <1 x bfloat> poison)
+  %V2BF16 = call <2 x bfloat> @llvm.fma(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+  %V4BF16 = call <4 x bfloat> @llvm.fma(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+  %V8BF16 = call <8 x bfloat> @llvm.fma(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+  %V16BF16 = call <16 x bfloat> @llvm.fma(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+  %V32BF16 = call <32 x bfloat> @llvm.fma(<32 x bfloat> poison, <32 x bfloat> poison, <32 x bfloat> poison)
+
+  %NXV1BF16 = call <vscale x 1 x bfloat> @llvm.fma(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+  %NXV2BF16 = call <vscale x 2 x bfloat> @llvm.fma(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+  %NXV4BF16 = call <vscale x 4 x bfloat> @llvm.fma(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+  %NXV8BF16 = call <vscale x 8 x bfloat> @llvm.fma(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+  %NXV16BF16 = call <vscale x 16 x bfloat> @llvm.fma(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+  %NXV32BF16 = call <vscale x 32 x bfloat> @llvm.fma(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> poison)
+
+  ret void
+}
+
 define void @fma_f16() {
-; CHECK-LABEL: 'fma_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fma_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fma_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; NO-ZFHMIN-LABEL: 'fma_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half poison, half poison, half poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> poison, <1 x half> poison, <1 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> poison, <32 x half> poison, <32 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> poison, <vscale x 32 x half> poison, <vscale x 32 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = call half @llvm.fma(half poison, half poison, half poison)
 
   %V1F16 = call <1 x half> @llvm.fma(<1 x half> poison, <1 x half> poison, <1 x half> poison)
   %V2F16 = call <2 x half> @llvm.fma(<2 x half> poison, <2 x half> poison, <2 x half> poison)
@@ -1486,46 +2252,30 @@ define void @fma_f16() {
 
 define void @fmuladd() {
 ; CHECK-LABEL: 'fmuladd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call float @llvm.fmuladd.f32(float poison, float poison, float poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call double @llvm.fmuladd.f64(double poison, double poison, double poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> poison, <8 x float> poison, <8 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> poison, <16 x float> poison, <16 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> poison, <2 x double> poison, <2 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> poison, <4 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> poison, <8 x double> poison, <8 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> poison, <16 x double> poison, <16 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %25 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %29 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %30 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison, <vscale x 16 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fmuladd.f32(float poison, float poison, float poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call double @llvm.fmuladd.f64(double poison, double poison, double poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> poison, <2 x float> poison, <2 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> poison, <8 x float> poison, <8 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> poison, <16 x float> poison, <16 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> poison, <2 x double> poison, <2 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> poison, <4 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> poison, <8 x double> poison, <8 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> poison, <16 x double> poison, <16 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> poison, <vscale x 1 x double> poison, <vscale x 1 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison, <vscale x 8 x double> poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %20 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison, <vscale x 16 x double> poison)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
-  call float @llvm.fmuladd.f32(float poison, float poison, float poison)
-  call double @llvm.fmuladd.f64(double poison, double poison, double poison)
-
-  call <2 x bfloat> @llvm.fmuladd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
-  call <4 x bfloat> @llvm.fmuladd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
-  call <8 x bfloat> @llvm.fmuladd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
-  call <16 x bfloat> @llvm.fmuladd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+  call float @llvm.fmuladd(float poison, float poison, float poison)
+  call double @llvm.fmuladd(double poison, double poison, double poison)
 
   call <2 x float> @llvm.fmuladd(<2 x float> poison, <2 x float> poison, <2 x float> poison)
   call <4 x float> @llvm.fmuladd(<4 x float> poison, <4 x float> poison, <4 x float> poison)
@@ -1537,12 +2287,6 @@ define void @fmuladd() {
   call <8 x double> @llvm.fmuladd(<8 x double> poison, <8 x double> poison, <8 x double> poison)
   call <16 x double> @llvm.fmuladd(<16 x double> poison, <16 x double> poison, <16 x double> poison)
 
-  call <vscale x 1 x bfloat> @llvm.fmuladd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
-  call <vscale x 2 x bfloat> @llvm.fmuladd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
-  call <vscale x 4 x bfloat> @llvm.fmuladd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
-  call <vscale x 8 x bfloat> @llvm.fmuladd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
-  call <vscale x 16 x bfloat> @llvm.fmuladd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
-
   call <vscale x 1 x float> @llvm.fmuladd(<vscale x 1 x float> poison, <vscale x 1 x float> poison, <vscale x 1 x float> poison)
   call <vscale x 2 x float> @llvm.fmuladd(<vscale x 2 x float> poison, <vscale x 2 x float> poison, <vscale x 2 x float> poison)
   call <vscale x 4 x float> @llvm.fmuladd(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
@@ -1558,21 +2302,102 @@ define void @fmuladd() {
   ret void
 }
 
+define void @fmuladd_bf16() {
+; ZVFH-LABEL: 'fmuladd_bf16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmuladd_bf16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmuladd_bf16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fmuladd.bf16(bfloat poison, bfloat poison, bfloat poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call bfloat @llvm.fmuladd(bfloat poison, bfloat poison, bfloat poison)
+
+  call <2 x bfloat> @llvm.fmuladd(<2 x bfloat> poison, <2 x bfloat> poison, <2 x bfloat> poison)
+  call <4 x bfloat> @llvm.fmuladd(<4 x bfloat> poison, <4 x bfloat> poison, <4 x bfloat> poison)
+  call <8 x bfloat> @llvm.fmuladd(<8 x bfloat> poison, <8 x bfloat> poison, <8 x bfloat> poison)
+  call <16 x bfloat> @llvm.fmuladd(<16 x bfloat> poison, <16 x bfloat> poison, <16 x bfloat> poison)
+
+  call <vscale x 1 x bfloat> @llvm.fmuladd(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> poison)
+  call <vscale x 2 x bfloat> @llvm.fmuladd(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+  call <vscale x 4 x bfloat> @llvm.fmuladd(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+  call <vscale x 8 x bfloat> @llvm.fmuladd(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+  call <vscale x 16 x bfloat> @llvm.fmuladd(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+  ret void
+}
+
 define void @fmuladd_f16() {
-; CHECK-LABEL: 'fmuladd_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fmuladd_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmuladd_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; NO-ZFHMIN-LABEL: 'fmuladd_f16'
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> poison, <2 x half> poison, <2 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> poison, <4 x half> poison, <4 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> poison, <8 x half> poison, <8 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> poison, <16 x half> poison, <16 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> poison, <vscale x 1 x half> poison, <vscale x 1 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; NO-ZFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  call half @llvm.fmuladd.f16(half poison, half poison, half poison)
+  call half @llvm.fmuladd(half poison, half poison, half poison)
 
   call <2 x half> @llvm.fmuladd(<2 x half> poison, <2 x half> poison, <2 x half> poison)
   call <4 x half> @llvm.fmuladd(<4 x half> poison, <4 x half> poison, <4 x half> poison)


        


More information about the llvm-commits mailing list