[llvm] 89c10e2 - [RISCV] Add zvfhmin cost model test coverage. NFC

Thu Sep 12 03:44:40 PDT 2024

Author: Luke Lau
Date: 2024-09-12T18:41:47+08:00
New Revision: 89c10e27d8b4d5f44998aad9abd2590d9f96c5df

URL: https://github.com/llvm/llvm-project/commit/89c10e27d8b4d5f44998aad9abd2590d9f96c5df
DIFF: https://github.com/llvm/llvm-project/commit/89c10e27d8b4d5f44998aad9abd2590d9f96c5df.diff

LOG: [RISCV] Add zvfhmin cost model test coverage. NFC

This adds tests coverage for zvfhmin and halfs in general in the cost
model tests.

Some existing half tests were split into separate functions so that if
the check prefixes diverge it won't affect the rest of the non-half
instructions.

Whilst we're here, also remove the redundant
-riscv-vector-bits-min=128 and declares.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
    llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
    llvm/test/Analysis/CostModel/RISCV/fround.ll
    llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
    llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
    llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
    llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll
    llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
    llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll
    llvm/test/Analysis/CostModel/RISCV/splice.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
index 5236f5a3bae954..e92b2b3c50b898 100644

--- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
@@ -1,25 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-min=128 < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64
 
-define i32 @fadd() {
+define void @fadd() {
 ; CHECK-LABEL: 'fadd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
@@ -38,26 +26,11 @@ define i32 @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd <vscale x 2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd <vscale x 8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = fadd half undef, undef
   %F32 = fadd float undef, undef
   %F64 = fadd double undef, undef
 
-  %V1F16 = fadd <1 x half> undef, undef
-  %V2F16 = fadd <2 x half> undef, undef
-  %V4F16 = fadd <4 x half> undef, undef
-  %V8F16 = fadd <8 x half> undef, undef
-  %V16F16 = fadd <16 x half> undef, undef
-  %V32F16 = fadd <32 x half> undef, undef
-
-  %NXV1F16 = fadd <vscale x 1 x half> undef, undef
-  %NXV2F16 = fadd <vscale x 2 x half> undef, undef
-  %NXV4F16 = fadd <vscale x 4 x half> undef, undef
-  %NXV8F16 = fadd <vscale x 8 x half> undef, undef
-  %NXV16F16 = fadd <vscale x 16 x half> undef, undef
-  %NXV32F16 = fadd <vscale x 32 x half> undef, undef
-
   %V1F32 = fadd <1 x float> undef, undef
   %V2F32 = fadd <2 x float> undef, undef
   %V4F32 = fadd <4 x float> undef, undef
@@ -80,26 +53,49 @@ define i32 @fadd() {
   %NXV4F64 = fadd <vscale x 4 x double> undef, undef
   %NXV8F64 = fadd <vscale x 8 x double> undef, undef
 
-  ret i32 undef
+  ret void
 }
 
-define i32 @fsub() {
+define void @fadd_f16() {
+; CHECK-LABEL: 'fadd_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = fadd half undef, undef
+
+  %V1F16 = fadd <1 x half> undef, undef
+  %V2F16 = fadd <2 x half> undef, undef
+  %V4F16 = fadd <4 x half> undef, undef
+  %V8F16 = fadd <8 x half> undef, undef
+  %V16F16 = fadd <16 x half> undef, undef
+  %V32F16 = fadd <32 x half> undef, undef
+
+  %NXV1F16 = fadd <vscale x 1 x half> undef, undef
+  %NXV2F16 = fadd <vscale x 2 x half> undef, undef
+  %NXV4F16 = fadd <vscale x 4 x half> undef, undef
+  %NXV8F16 = fadd <vscale x 8 x half> undef, undef
+  %NXV16F16 = fadd <vscale x 16 x half> undef, undef
+  %NXV32F16 = fadd <vscale x 32 x half> undef, undef
+
+  ret void
+}
+
+define void @fsub() {
 ; CHECK-LABEL: 'fsub'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
@@ -118,26 +114,11 @@ define i32 @fsub() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub <vscale x 2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub <vscale x 8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = fsub half undef, undef
   %F32 = fsub float undef, undef
   %F64 = fsub double undef, undef
 
-  %V1F16 = fsub <1 x half> undef, undef
-  %V2F16 = fsub <2 x half> undef, undef
-  %V4F16 = fsub <4 x half> undef, undef
-  %V8F16 = fsub <8 x half> undef, undef
-  %V16F16 = fsub <16 x half> undef, undef
-  %V32F16 = fsub <32 x half> undef, undef
-
-  %NXV1F16 = fsub <vscale x 1 x half> undef, undef
-  %NXV2F16 = fsub <vscale x 2 x half> undef, undef
-  %NXV4F16 = fsub <vscale x 4 x half> undef, undef
-  %NXV8F16 = fsub <vscale x 8 x half> undef, undef
-  %NXV16F16 = fsub <vscale x 16 x half> undef, undef
-  %NXV32F16 = fsub <vscale x 32 x half> undef, undef
-
   %V1F32 = fsub <1 x float> undef, undef
   %V2F32 = fsub <2 x float> undef, undef
   %V4F32 = fsub <4 x float> undef, undef
@@ -160,26 +141,49 @@ define i32 @fsub() {
   %NXV4F64 = fsub <vscale x 4 x double> undef, undef
   %NXV8F64 = fsub <vscale x 8 x double> undef, undef
 
-  ret i32 undef
+  ret void
 }
 
-define i32 @fmul() {
+define void @fsub_f16() {
+; CHECK-LABEL: 'fsub_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = fsub half undef, undef
+
+  %V1F16 = fsub <1 x half> undef, undef
+  %V2F16 = fsub <2 x half> undef, undef
+  %V4F16 = fsub <4 x half> undef, undef
+  %V8F16 = fsub <8 x half> undef, undef
+  %V16F16 = fsub <16 x half> undef, undef
+  %V32F16 = fsub <32 x half> undef, undef
+
+  %NXV1F16 = fsub <vscale x 1 x half> undef, undef
+  %NXV2F16 = fsub <vscale x 2 x half> undef, undef
+  %NXV4F16 = fsub <vscale x 4 x half> undef, undef
+  %NXV8F16 = fsub <vscale x 8 x half> undef, undef
+  %NXV16F16 = fsub <vscale x 16 x half> undef, undef
+  %NXV32F16 = fsub <vscale x 32 x half> undef, undef
+
+  ret void
+}
+
+define void @fmul() {
 ; CHECK-LABEL: 'fmul'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
@@ -198,26 +202,11 @@ define i32 @fmul() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul <vscale x 2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul <vscale x 8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = fmul half undef, undef
   %F32 = fmul float undef, undef
   %F64 = fmul double undef, undef
 
-  %V1F16 = fmul <1 x half> undef, undef
-  %V2F16 = fmul <2 x half> undef, undef
-  %V4F16 = fmul <4 x half> undef, undef
-  %V8F16 = fmul <8 x half> undef, undef
-  %V16F16 = fmul <16 x half> undef, undef
-  %V32F16 = fmul <32 x half> undef, undef
-
-  %NXV1F16 = fmul <vscale x 1 x half> undef, undef
-  %NXV2F16 = fmul <vscale x 2 x half> undef, undef
-  %NXV4F16 = fmul <vscale x 4 x half> undef, undef
-  %NXV8F16 = fmul <vscale x 8 x half> undef, undef
-  %NXV16F16 = fmul <vscale x 16 x half> undef, undef
-  %NXV32F16 = fmul <vscale x 32 x half> undef, undef
-
   %V1F32 = fmul <1 x float> undef, undef
   %V2F32 = fmul <2 x float> undef, undef
   %V4F32 = fmul <4 x float> undef, undef
@@ -240,26 +229,49 @@ define i32 @fmul() {
   %NXV4F64 = fmul <vscale x 4 x double> undef, undef
   %NXV8F64 = fmul <vscale x 8 x double> undef, undef
 
-  ret i32 undef
+  ret void
+}
+
+define void @fmul_f16() {
+; CHECK-LABEL: 'fmul_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = fmul half undef, undef
+
+  %V1F16 = fmul <1 x half> undef, undef
+  %V2F16 = fmul <2 x half> undef, undef
+  %V4F16 = fmul <4 x half> undef, undef
+  %V8F16 = fmul <8 x half> undef, undef
+  %V16F16 = fmul <16 x half> undef, undef
+  %V32F16 = fmul <32 x half> undef, undef
+
+  %NXV1F16 = fmul <vscale x 1 x half> undef, undef
+  %NXV2F16 = fmul <vscale x 2 x half> undef, undef
+  %NXV4F16 = fmul <vscale x 4 x half> undef, undef
+  %NXV8F16 = fmul <vscale x 8 x half> undef, undef
+  %NXV16F16 = fmul <vscale x 16 x half> undef, undef
+  %NXV32F16 = fmul <vscale x 32 x half> undef, undef
+
+  ret void
 }
 
-define i32 @fdiv() {
+define void @fdiv() {
 ; CHECK-LABEL: 'fdiv'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef
@@ -278,26 +290,11 @@ define i32 @fdiv() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv <vscale x 2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv <vscale x 8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = fdiv half undef, undef
   %F32 = fdiv float undef, undef
   %F64 = fdiv double undef, undef
 
-  %V1F16 = fdiv <1 x half> undef, undef
-  %V2F16 = fdiv <2 x half> undef, undef
-  %V4F16 = fdiv <4 x half> undef, undef
-  %V8F16 = fdiv <8 x half> undef, undef
-  %V16F16 = fdiv <16 x half> undef, undef
-  %V32F16 = fdiv <32 x half> undef, undef
-
-  %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
-  %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
-  %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
-  %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
-  %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
-  %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
-
   %V1F32 = fdiv <1 x float> undef, undef
   %V2F32 = fdiv <2 x float> undef, undef
   %V4F32 = fdiv <4 x float> undef, undef
@@ -320,26 +317,49 @@ define i32 @fdiv() {
   %NXV4F64 = fdiv <vscale x 4 x double> undef, undef
   %NXV8F64 = fdiv <vscale x 8 x double> undef, undef
 
-  ret i32 undef
+  ret void
+}
+
+define void @fdiv_f16() {
+; CHECK-LABEL: 'fdiv_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = fdiv half undef, undef
+
+  %V1F16 = fdiv <1 x half> undef, undef
+  %V2F16 = fdiv <2 x half> undef, undef
+  %V4F16 = fdiv <4 x half> undef, undef
+  %V8F16 = fdiv <8 x half> undef, undef
+  %V16F16 = fdiv <16 x half> undef, undef
+  %V32F16 = fdiv <32 x half> undef, undef
+
+  %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
+  %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
+  %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
+  %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
+  %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
+  %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
+
+  ret void
 }
 
-define i32 @frem() {
+define void @frem() {
 ; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2F16 = frem <2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4F16 = frem <4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8F16 = frem <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = frem <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = frem <4 x float> undef, undef
@@ -358,26 +378,11 @@ define i32 @frem() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F64 = frem <vscale x 2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F64 = frem <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F64 = frem <vscale x 8 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = frem half undef, undef
   %F32 = frem float undef, undef
   %F64 = frem double undef, undef
 
-  %V1F16 = frem <1 x half> undef, undef
-  %V2F16 = frem <2 x half> undef, undef
-  %V4F16 = frem <4 x half> undef, undef
-  %V8F16 = frem <8 x half> undef, undef
-  %V16F16 = frem <16 x half> undef, undef
-  %V32F16 = frem <32 x half> undef, undef
-
-  %NXV1F16 = frem <vscale x 1 x half> undef, undef
-  %NXV2F16 = frem <vscale x 2 x half> undef, undef
-  %NXV4F16 = frem <vscale x 4 x half> undef, undef
-  %NXV8F16 = frem <vscale x 8 x half> undef, undef
-  %NXV16F16 = frem <vscale x 16 x half> undef, undef
-  %NXV32F16 = frem <vscale x 32 x half> undef, undef
-
   %V1F32 = frem <1 x float> undef, undef
   %V2F32 = frem <2 x float> undef, undef
   %V4F32 = frem <4 x float> undef, undef
@@ -400,26 +405,65 @@ define i32 @frem() {
   %NXV4F64 = frem <vscale x 4 x double> undef, undef
   %NXV8F64 = frem <vscale x 8 x double> undef, undef
 
-  ret i32 undef
+  ret void
 }
 
-define i32 @fneg() {
+define void @frem_f16() {
+; ZVFH-LABEL: 'frem_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2F16 = frem <2 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4F16 = frem <4 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8F16 = frem <8 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = frem <vscale x 2 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = frem <vscale x 4 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = frem <vscale x 8 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = frem <vscale x 16 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = frem <vscale x 32 x half> undef, undef
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'frem_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2F16 = frem <2 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4F16 = frem <4 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8F16 = frem <8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = frem <vscale x 2 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = frem <vscale x 4 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = frem <vscale x 8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV16F16 = frem <vscale x 16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV32F16 = frem <vscale x 32 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = frem half undef, undef
+
+  %V1F16 = frem <1 x half> undef, undef
+  %V2F16 = frem <2 x half> undef, undef
+  %V4F16 = frem <4 x half> undef, undef
+  %V8F16 = frem <8 x half> undef, undef
+  %V16F16 = frem <16 x half> undef, undef
+  %V32F16 = frem <32 x half> undef, undef
+
+  %NXV1F16 = frem <vscale x 1 x half> undef, undef
+  %NXV2F16 = frem <vscale x 2 x half> undef, undef
+  %NXV4F16 = frem <vscale x 4 x half> undef, undef
+  %NXV8F16 = frem <vscale x 8 x half> undef, undef
+  %NXV16F16 = frem <vscale x 16 x half> undef, undef
+  %NXV32F16 = frem <vscale x 32 x half> undef, undef
+
+  ret void
+}
+
+define void @fneg() {
 ; CHECK-LABEL: 'fneg'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef
@@ -438,26 +482,11 @@ define i32 @fneg() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg <vscale x 2 x double> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg <vscale x 4 x double> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg <vscale x 8 x double> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = fneg half undef
   %F32 = fneg float undef
   %F64 = fneg double undef
 
-  %V1F16 = fneg <1 x half> undef
-  %V2F16 = fneg <2 x half> undef
-  %V4F16 = fneg <4 x half> undef
-  %V8F16 = fneg <8 x half> undef
-  %V16F16 = fneg <16 x half> undef
-  %V32F16 = fneg <32 x half> undef
-
-  %NXV1F16 = fneg <vscale x 1 x half> undef
-  %NXV2F16 = fneg <vscale x 2 x half> undef
-  %NXV4F16 = fneg <vscale x 4 x half> undef
-  %NXV8F16 = fneg <vscale x 8 x half> undef
-  %NXV16F16 = fneg <vscale x 16 x half> undef
-  %NXV32F16 = fneg <vscale x 32 x half> undef
-
   %V1F32 = fneg <1 x float> undef
   %V2F32 = fneg <2 x float> undef
   %V4F32 = fneg <4 x float> undef
@@ -480,26 +509,49 @@ define i32 @fneg() {
   %NXV4F64 = fneg <vscale x 4 x double> undef
   %NXV8F64 = fneg <vscale x 8 x double> undef
 
-  ret i32 undef
+  ret void
 }
 
-define i32 @fcopysign() {
+define void @fneg_f16() {
+; CHECK-LABEL: 'fneg_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg <vscale x 1 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg <vscale x 2 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg <vscale x 4 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg <vscale x 8 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg <vscale x 16 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg <vscale x 32 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = fneg half undef
+
+  %V1F16 = fneg <1 x half> undef
+  %V2F16 = fneg <2 x half> undef
+  %V4F16 = fneg <4 x half> undef
+  %V8F16 = fneg <8 x half> undef
+  %V16F16 = fneg <16 x half> undef
+  %V32F16 = fneg <32 x half> undef
+
+  %NXV1F16 = fneg <vscale x 1 x half> undef
+  %NXV2F16 = fneg <vscale x 2 x half> undef
+  %NXV4F16 = fneg <vscale x 4 x half> undef
+  %NXV8F16 = fneg <vscale x 8 x half> undef
+  %NXV16F16 = fneg <vscale x 16 x half> undef
+  %NXV32F16 = fneg <vscale x 32 x half> undef
+
+  ret void
+}
+
+define void @fcopysign() {
 ; CHECK-LABEL: 'fcopysign'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> undef, <1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
@@ -518,26 +570,11 @@ define i32 @fcopysign() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = call half @llvm.copysign.f16(half undef, half undef)
   %F32 = call float @llvm.copysign.f32(float undef, float undef)
   %F64 = call double @llvm.copysign.f64(double undef, double undef)
 
-  %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
-  %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
-  %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
-  %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
-  %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
-  %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
-
-  %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
-  %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
-
   %V1F32 = call <1 x float> @llvm.copysign.v1f32(<1 x float> undef, <1 x float> undef)
   %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
   %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
@@ -560,26 +597,65 @@ define i32 @fcopysign() {
   %NXV4F64 = call <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
   %NXV8F64 = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef)
 
-  ret i32 undef
+  ret void
+}
+
+define void @fcopysign_f16() {
+; ZVFH-LABEL: 'fcopysign_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fcopysign_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = call half @llvm.copysign.f16(half undef, half undef)
+
+  %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
+  %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+  %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+  %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+  %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+  %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
+
+  %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  %NXV2F16 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  %NXV4F16 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  %NXV8F16 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  %NXV16F16 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  %NXV32F16 = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef)
+
+  ret void
 }
 
-define i32 @fma() {
+define void @fma() {
 ; CHECK-LABEL: 'fma'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> undef, <1 x float> undef, <1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
@@ -598,26 +674,11 @@ define i32 @fma() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
   %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
   %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
 
-  %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
-  %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-  %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-  %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-  %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-  %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
-
-  %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-  %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
-
   %V1F32 = call <1 x float> @llvm.fma.v1f32(<1 x float> undef, <1 x float> undef, <1 x float> undef)
   %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
   %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
@@ -640,50 +701,87 @@ define i32 @fma() {
   %NXV4F64 = call <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
   %NXV8F64 = call <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
 
-  ret i32 undef
+  ret void
+}
+
+define void @fma_f16() {
+; ZVFH-LABEL: 'fma_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fma_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %F16 = call half @llvm.fma.f16(half undef, half undef, half undef)
+
+  %V1F16 = call <1 x half> @llvm.fma.v1f16(<1 x half> undef, <1 x half> undef, <1 x half> undef)
+  %V2F16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+  %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+  %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+  %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+  %V32F16 = call <32 x half> @llvm.fma.v32f16(<32 x half> undef, <32 x half> undef, <32 x half> undef)
+
+  %NXV1F16 = call <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  %NXV2F16 = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  %NXV4F16 = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  %NXV8F16 = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  %NXV16F16 = call <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  %NXV32F16 = call <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x half> undef, <vscale x 32 x half> undef)
+
+  ret void
 }
 
 define void @fmuladd() {
 ; CHECK-LABEL: 'fmuladd'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call double @llvm.fmuladd.f64(double undef, double undef, double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fmuladd.f32(float undef, float undef, float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call double @llvm.fmuladd.f64(double undef, double undef, double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  call half @llvm.fmuladd.f16(half undef, half undef, half undef)
   call float @llvm.fmuladd.f32(float undef, float undef, float undef)
   call double @llvm.fmuladd.f64(double undef, double undef, double undef)
-  call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-  call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-  call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-  call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
   call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
   call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
   call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
@@ -692,11 +790,6 @@ define void @fmuladd() {
   call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
   call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
   call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
-  call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-  call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
   call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
   call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
   call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
@@ -710,113 +803,42 @@ define void @fmuladd() {
   ret void
 }
 
-declare half @llvm.copysign.f16(half, half)
-declare float @llvm.copysign.f32(float, float)
-declare double @llvm.copysign.f64(double, double)
-
-declare <1 x half> @llvm.copysign.v1f16(<1 x half>, <1 x half>)
-declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>)
-declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
-declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
-declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>)
-declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
-
-declare <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
-declare <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
-declare <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
-declare <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
-declare <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>)
-
-declare <1 x float> @llvm.copysign.v1f32(<1 x float>, <1 x float>)
-declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
-
-declare <vscale x 1 x float> @llvm.copysign.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.copysign.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.copysign.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.copysign.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>)
-
-declare <1 x double> @llvm.copysign.v1f64(<1 x double>, <1 x double>)
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
-
-declare <vscale x 1 x double> @llvm.copysign.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.copysign.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>)
-
-declare half @llvm.fma.f16(half, half, half)
-declare float @llvm.fma.f32(float, float, float)
-declare double @llvm.fma.f64(double, double, double)
-
-declare <1 x half> @llvm.fma.v1f16(<1 x half>, <1 x half>, <1 x half>)
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
-declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
-declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
-declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
-declare <32 x half> @llvm.fma.v32f16(<32 x half>, <32 x half>, <32 x half>)
-
-declare <vscale x 1 x half> @llvm.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>)
-declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
-declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
-declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 16 x half> @llvm.fma.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>)
-declare <vscale x 32 x half> @llvm.fma.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x half>)
-
-declare <1 x float> @llvm.fma.v1f32(<1 x float>, <1 x float>, <1 x float>)
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
-declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
-
-declare <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>)
-
-declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
-declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)
-
-declare <vscale x 1 x double> @llvm.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>)
-
-declare half @llvm.fmuladd.f16(half, half, half)
-declare float @llvm.fmuladd.f32(float, float, float)
-declare double @llvm.fmuladd.f64(double, double, double)
-declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>)
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>)
-declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
-declare <16 x half> @llvm.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>)
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
-declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
-declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
-declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
-declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)
-declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>)
-declare <16 x double> @llvm.fmuladd.v16f64(<16 x double>, <16 x double>, <16 x double>)
-declare <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>)
-declare <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
-declare <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
-declare <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>)
-declare <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>)
-declare <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>)
-declare <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double>, <vscale x 16 x double>, <vscale x 16 x double>)
+define void @fmuladd_f16() {
+; ZVFH-LABEL: 'fmuladd_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fmuladd_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.fmuladd.f16(half undef, half undef, half undef)
+  call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+  call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+  call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+  call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+  call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
index f8fe84cba3231c..64dc264f12735b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
@@ -1,48 +1,30 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-min=-1 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 
 define void @fabs() {
 ; CHECK-LABEL: 'fabs'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.fabs.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = call double @llvm.fabs.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <16 x double> @llvm.fabs.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fabs.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call double @llvm.fabs.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.fabs.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-
-  call half @llvm.fabs.f16(half undef)
-  call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
-  call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
-  call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
-  call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-  call <vscale x 2 x half> @llvm.fabs.nvx2f16(<vscale x 2 x half> undef)
-  call <vscale x 4 x half> @llvm.fabs.nvx4f16(<vscale x 4 x half> undef)
-  call <vscale x 8 x half> @llvm.fabs.nvx8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.fabs.nvx16f16(<vscale x 16 x half> undef)
   call float @llvm.fabs.f32(float undef)
   call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
   call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
@@ -65,6 +47,31 @@ define void @fabs() {
   ret void
 }
 
+define void @fabs_f16() {
+; CHECK-LABEL: 'fabs_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.fabs.f16(half undef)
+  call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+  call <vscale x 2 x half> @llvm.fabs.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.fabs.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.fabs.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.fabs.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @minnum() {
 ; CHECK-LABEL: 'minnum'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.minnum.f32(float undef, float undef)
@@ -110,6 +117,46 @@ define void @minnum() {
   ret void
 }
 
+define void @minnum_f16() {
+; ZVFH-LABEL: 'minnum_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.minnum.f16(half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'minnum_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.minnum.f16(half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.minnum.f16(half undef, half undef)
+  call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef)
+  call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef)
+  call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef)
+  call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef)
+  call <vscale x 1 x half> @llvm.minnum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.minnum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.minnum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.minnum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.minnum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  ret void
+}
+
 define void @maxnum() {
 ; CHECK-LABEL: 'maxnum'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.maxnum.f32(float undef, float undef)
@@ -155,6 +202,46 @@ define void @maxnum() {
   ret void
 }
 
+define void @maxnum_f16() {
+; ZVFH-LABEL: 'maxnum_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.maxnum.f16(half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'maxnum_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.maxnum.f16(half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.maxnum.f16(half undef, half undef)
+  call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef)
+  call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef)
+  call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef)
+  call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef)
+  call <vscale x 1 x half> @llvm.maxnum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.maxnum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.maxnum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.maxnum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.maxnum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  ret void
+}
+
 define void @minimum() {
 ; CHECK-LABEL: 'minimum'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.minimum.f32(float undef, float undef)
@@ -200,6 +287,46 @@ define void @minimum() {
   ret void
 }
 
+define void @minimum_f16() {
+; ZVFH-LABEL: 'minimum_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call half @llvm.minimum.f16(half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'minimum_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call half @llvm.minimum.f16(half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.minimum.f16(half undef, half undef)
+  call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
+  call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+  call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+  call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+  call <vscale x 1 x half> @llvm.minimum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.minimum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.minimum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.minimum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.minimum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  ret void
+}
+
 define void @maximum() {
 ; CHECK-LABEL: 'maximum'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.maximum.f32(float undef, float undef)
@@ -245,6 +372,46 @@ define void @maximum() {
   ret void
 }
 
+define void @maximum_f16() {
+; ZVFH-LABEL: 'maximum_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call half @llvm.maximum.f16(half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'maximum_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call half @llvm.maximum.f16(half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.maximum.f16(half undef, half undef)
+  call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
+  call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+  call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+  call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+  call <vscale x 1 x half> @llvm.maximum.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.maximum.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.maximum.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.maximum.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.maximum.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  ret void
+}
+
 define void @copysign() {
 ; CHECK-LABEL: 'copysign'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.copysign.f32(float undef, float undef)
@@ -290,131 +457,42 @@ define void @copysign() {
   ret void
 }
 
-declare half @llvm.fabs.f16(half)
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
-declare <vscale x 2 x half> @llvm.fabs.nvx2f16(<vscale x 2 x half>)
-declare <vscale x 4 x half> @llvm.fabs.nvx4f16(<vscale x 4 x half>)
-declare <vscale x 8 x half> @llvm.fabs.nvx8f16(<vscale x 8 x half>)
-declare <vscale x 16 x half> @llvm.fabs.nvx16f16(<vscale x 16 x half>)
-declare float @llvm.fabs.f32(float)
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
-declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.fabs.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.fabs.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.fabs.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.fabs.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.fabs.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.fabs.f64(double)
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
-declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
-declare <16 x double> @llvm.fabs.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.fabs.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.fabs.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.fabs.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.fabs.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.minnum.f32(float, float)
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>)
-declare <vscale x 1 x float> @llvm.minnum.nvx1f32(<vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.minnum.nvx2f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.minnum.nvx4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.minnum.nvx8f32(<vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.minnum.nvx16f32(<vscale x 16 x float>, <vscale x 16 x float>)
-declare double @llvm.minnum.f64(double, double)
-declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
-declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>)
-declare <vscale x 1 x double> @llvm.minnum.nvx1f64(<vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.minnum.nvx2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.minnum.nvx4f64(<vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.minnum.nvx8f64(<vscale x 8 x double>, <vscale x 8 x double>)
-
-declare float @llvm.maxnum.f32(float, float)
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>)
-declare <vscale x 1 x float> @llvm.maxnum.nvx1f32(<vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.maxnum.nvx2f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.maxnum.nvx4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.maxnum.nvx8f32(<vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.maxnum.nvx16f32(<vscale x 16 x float>, <vscale x 16 x float>)
-declare double @llvm.maxnum.f64(double, double)
-declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
-declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>)
-declare <vscale x 1 x double> @llvm.maxnum.nvx1f64(<vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.maxnum.nvx2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.maxnum.nvx4f64(<vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.maxnum.nvx8f64(<vscale x 8 x double>, <vscale x 8 x double>)
-
-declare float @llvm.minimum.f32(float, float)
-declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>)
-declare <vscale x 1 x float> @llvm.minimum.nvx1f32(<vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.minimum.nvx2f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.minimum.nvx4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.minimum.nvx8f32(<vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.minimum.nvx16f32(<vscale x 16 x float>, <vscale x 16 x float>)
-declare double @llvm.minimum.f64(double, double)
-declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.minimum.v8f64(<8 x double>, <8 x double>)
-declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>)
-declare <vscale x 1 x double> @llvm.minimum.nvx1f64(<vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.minimum.nvx2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.minimum.nvx4f64(<vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.minimum.nvx8f64(<vscale x 8 x double>, <vscale x 8 x double>)
-
-declare float @llvm.maximum.f32(float, float)
-declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
-declare <vscale x 1 x float> @llvm.maximum.nvx1f32(<vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.maximum.nvx2f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.maximum.nvx4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.maximum.nvx8f32(<vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.maximum.nvx16f32(<vscale x 16 x float>, <vscale x 16 x float>)
-declare double @llvm.maximum.f64(double, double)
-declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>)
-declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
-declare <vscale x 1 x double> @llvm.maximum.nvx1f64(<vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.maximum.nvx2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.maximum.nvx4f64(<vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.maximum.nvx8f64(<vscale x 8 x double>, <vscale x 8 x double>)
-
-declare float @llvm.copysign.f32(float, float)
-declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
-declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
-declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>)
-declare <vscale x 1 x float> @llvm.copysign.nvx1f32(<vscale x 1 x float>, <vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.copysign.nvx2f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.copysign.nvx4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.copysign.nvx8f32(<vscale x 8 x float>, <vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.copysign.nvx16f32(<vscale x 16 x float>, <vscale x 16 x float>)
-declare double @llvm.copysign.f64(double, double)
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
-declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>)
-declare <16 x double> @llvm.copysign.v16f64(<16 x double>, <16 x double>)
-declare <vscale x 1 x double> @llvm.copysign.nvx1f64(<vscale x 1 x double>, <vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.copysign.nvx2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.copysign.nvx4f64(<vscale x 4 x double>, <vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.copysign.nvx8f64(<vscale x 8 x double>, <vscale x 8 x double>)
+define void @copysign_f16() {
+; ZVFH-LABEL: 'copysign_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.copysign.f16(half undef, half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'copysign_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.copysign.f16(half undef, half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %3 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %4 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %5 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.copysign.f16(half undef, half undef)
+  call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
+  call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
+  call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
+  call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
+  call <vscale x 1 x half> @llvm.copysign.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.copysign.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.copysign.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.copysign.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.copysign.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index 71dd64d7651280..dc501b82417d3d 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d -riscv-v-vector-bits-min=-1 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 
 define void @floor() {
 ; CHECK-LABEL: 'floor'
@@ -46,6 +47,46 @@ define void @floor() {
   ret void
 }
 
+define void @floor_fp16() {
+; ZVFH-LABEL: 'floor_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.floor.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.floor.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.floor.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.floor.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.floor.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'floor_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.floor.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.floor.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.floor.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.floor.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.floor.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.floor.f16(half undef)
+  call <2 x half> @llvm.floor.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.floor.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.floor.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.floor.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.floor.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.floor.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.floor.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.floor.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.floor.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @ceil() {
 ; CHECK-LABEL: 'ceil'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.ceil.f32(float undef)
@@ -91,6 +132,46 @@ define void @ceil() {
   ret void
 }
 
+define void @ceil_fp16() {
+; ZVFH-LABEL: 'ceil_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.ceil.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.ceil.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.ceil.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.ceil.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.ceil.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'ceil_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.ceil.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.ceil.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.ceil.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.ceil.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.ceil.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.ceil.f16(half undef)
+  call <2 x half> @llvm.ceil.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.ceil.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.ceil.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.ceil.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.ceil.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.ceil.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.ceil.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.ceil.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.ceil.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @trunc() {
 ; CHECK-LABEL: 'trunc'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.trunc.f32(float undef)
@@ -136,6 +217,46 @@ define void @trunc() {
   ret void
 }
 
+define void @trunc_fp16() {
+; ZVFH-LABEL: 'trunc_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.trunc.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'trunc_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.trunc.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 159 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.trunc.f16(half undef)
+  call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.trunc.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.trunc.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.trunc.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.trunc.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.trunc.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @rint() {
 ; CHECK-LABEL: 'rint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.rint.f32(float undef)
@@ -181,6 +302,46 @@ define void @rint() {
   ret void
 }
 
+define void @rint_fp16() {
+; ZVFH-LABEL: 'rint_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.rint.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.rint.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.rint.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.rint.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.rint.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'rint_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.rint.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.rint.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.rint.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.rint.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.rint.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.rint.f16(half undef)
+  call <2 x half> @llvm.rint.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.rint.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.rint.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.rint.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.rint.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.rint.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.rint.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.rint.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.rint.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @lrint() {
 ; CHECK-LABEL: 'lrint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f32(float undef)
@@ -226,6 +387,33 @@ define void @lrint() {
   ret void
 }
 
+define void @lrint_fp16() {
+; CHECK-LABEL: 'lrint_fp16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f16(<vscale x 1 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f16(<vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f16(<vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call i64 @llvm.lrint.f16(half undef)
+  call <2 x i64> @llvm.lrint.v2f16(<2 x half> undef)
+  call <4 x i64> @llvm.lrint.v4f16(<4 x half> undef)
+  call <8 x i64> @llvm.lrint.v8f16(<8 x half> undef)
+  call <16 x i64> @llvm.lrint.v16f16(<16 x half> undef)
+  call <vscale x 1 x i64> @llvm.lrint.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x i64> @llvm.lrint.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x i64> @llvm.lrint.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x i64> @llvm.lrint.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x i64> @llvm.lrint.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @llrint() {
 ; CHECK-LABEL: 'llrint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f32(float undef)
@@ -271,6 +459,33 @@ define void @llrint() {
   ret void
 }
 
+define void @llrint_fp16() {
+; CHECK-LABEL: 'llrint_fp16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f16(half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f16(<vscale x 1 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f16(<vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f16(<vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call i64 @llvm.llrint.f16(half undef)
+  call <2 x i64> @llvm.llrint.v2f16(<2 x half> undef)
+  call <4 x i64> @llvm.llrint.v4f16(<4 x half> undef)
+  call <8 x i64> @llvm.llrint.v8f16(<8 x half> undef)
+  call <16 x i64> @llvm.llrint.v16f16(<16 x half> undef)
+  call <vscale x 1 x i64> @llvm.llrint.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x i64> @llvm.llrint.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x i64> @llvm.llrint.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x i64> @llvm.llrint.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x i64> @llvm.llrint.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @nearbyint() {
 ; CHECK-LABEL: 'nearbyint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.nearbyint.f32(float undef)
@@ -316,6 +531,46 @@ define void @nearbyint() {
   ret void
 }
 
+define void @nearbyint_fp16() {
+; ZVFH-LABEL: 'nearbyint_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.nearbyint.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'nearbyint_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call half @llvm.nearbyint.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.nearbyint.f16(half undef)
+  call <2 x half> @llvm.nearbyint.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.nearbyint.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.nearbyint.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.nearbyint.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.nearbyint.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.nearbyint.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.nearbyint.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.nearbyint.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.nearbyint.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @round() {
 ; CHECK-LABEL: 'round'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.round.f32(float undef)
@@ -361,6 +616,46 @@ define void @round() {
   ret void
 }
 
+define void @round_fp16() {
+; ZVFH-LABEL: 'round_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.round.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.round.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.round.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.round.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.round.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'round_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.round.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.round.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.round.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.round.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.round.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.round.f16(half undef)
+  call <2 x half> @llvm.round.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.round.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.round.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.round.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.round.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.round.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.round.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.round.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.round.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @roundeven() {
 ; CHECK-LABEL: 'roundeven'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.roundeven.f32(float undef)
@@ -406,6 +701,46 @@ define void @roundeven() {
   ret void
 }
 
+define void @roundeven_fp16() {
+; ZVFH-LABEL: 'roundeven_fp16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.roundeven.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.roundeven.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.roundeven.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'roundeven_fp16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.roundeven.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.roundeven.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.roundeven.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.roundeven.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.roundeven.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call half @llvm.roundeven.f16(half undef)
+  call <2 x half> @llvm.roundeven.v2f16(<2 x half> undef)
+  call <4 x half> @llvm.roundeven.v4f16(<4 x half> undef)
+  call <8 x half> @llvm.roundeven.v8f16(<8 x half> undef)
+  call <16 x half> @llvm.roundeven.v16f16(<16 x half> undef)
+  call <vscale x 1 x half> @llvm.roundeven.nvx1f16(<vscale x 1 x half> undef)
+  call <vscale x 2 x half> @llvm.roundeven.nvx2f16(<vscale x 2 x half> undef)
+  call <vscale x 4 x half> @llvm.roundeven.nvx4f16(<vscale x 4 x half> undef)
+  call <vscale x 8 x half> @llvm.roundeven.nvx8f16(<vscale x 8 x half> undef)
+  call <vscale x 16 x half> @llvm.roundeven.nvx16f16(<vscale x 16 x half> undef)
+  ret void
+}
+
 define void @vp_ceil() {
 ; CHECK-LABEL: 'vp_ceil'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -447,6 +782,43 @@ define void @vp_ceil() {
   ret void
 }
 
+define void @vp_ceil_f16() {
+; ZVFH-LABEL: 'vp_ceil_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_ceil_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.ceil.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.ceil.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.ceil.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.ceil.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.ceil.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @vp_floor() {
 ; CHECK-LABEL: 'vp_floor'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -488,6 +860,43 @@ define void @vp_floor() {
   ret void
 }
 
+define void @vp_floor_f16() {
+; ZVFH-LABEL: 'vp_floor_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_floor_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.floor.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.floor.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.floor.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.floor.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.floor.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.floor.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.floor.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @vp_round() {
 ; CHECK-LABEL: 'vp_round'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -529,6 +938,43 @@ define void @vp_round() {
   ret void
 }
 
+define void @vp_round_f16() {
+; ZVFH-LABEL: 'vp_round_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.round.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_round_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.round.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.round.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.round.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.round.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.round.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.round.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.round.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.round.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.round.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.round.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @vp_roundeven() {
 ; CHECK-LABEL: 'vp_roundeven'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -570,6 +1016,43 @@ define void @vp_roundeven() {
   ret void
 }
 
+define void @vp_roundeven_f16() {
+; ZVFH-LABEL: 'vp_roundeven_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_roundeven_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.roundeven.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.roundeven.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.roundeven.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.roundeven.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.roundeven.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @vp_roundtozero() {
 ; CHECK-LABEL: 'vp_roundtozero'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -611,6 +1094,43 @@ define void @vp_roundtozero() {
   ret void
 }
 
+define void @vp_roundtozero_f16() {
+; ZVFH-LABEL: 'vp_roundtozero_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_roundtozero_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.roundtozero.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.roundtozero.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.roundtozero.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.roundtozero.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.roundtozero.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @vp_rint() {
 ; CHECK-LABEL: 'vp_rint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -652,6 +1172,43 @@ define void @vp_rint() {
   ret void
 }
 
+define void @vp_rint_f16() {
+; ZVFH-LABEL: 'vp_rint_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_rint_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.rint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.rint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.rint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.rint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.rint.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.rint.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.rint.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.rint.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.rint.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @vp_nearbyint() {
 ; CHECK-LABEL: 'vp_nearbyint'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
@@ -693,315 +1250,39 @@ define void @vp_nearbyint() {
   ret void
 }
 
-declare float @llvm.floor.f32(float)
-declare <2 x float> @llvm.floor.v2f32(<2 x float>)
-declare <4 x float> @llvm.floor.v4f32(<4 x float>)
-declare <8 x float> @llvm.floor.v8f32(<8 x float>)
-declare <16 x float> @llvm.floor.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.floor.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.floor.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.floor.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.floor.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.floor.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.floor.f64(double)
-declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-declare <4 x double> @llvm.floor.v4f64(<4 x double>)
-declare <8 x double> @llvm.floor.v8f64(<8 x double>)
-declare <16 x double> @llvm.floor.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.floor.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.floor.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.floor.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.floor.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.ceil.f32(float)
-declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
-declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
-declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.ceil.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.ceil.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.ceil.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.ceil.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.ceil.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.ceil.f64(double)
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
-declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
-declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.ceil.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.ceil.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.ceil.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.ceil.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.trunc.f32(float)
-declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
-declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
-declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.trunc.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.trunc.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.trunc.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.trunc.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.trunc.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.trunc.f64(double)
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
-declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
-declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.trunc.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.trunc.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.trunc.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.trunc.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.rint.f32(float)
-declare <2 x float> @llvm.rint.v2f32(<2 x float>)
-declare <4 x float> @llvm.rint.v4f32(<4 x float>)
-declare <8 x float> @llvm.rint.v8f32(<8 x float>)
-declare <16 x float> @llvm.rint.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.rint.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.rint.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.rint.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.rint.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.rint.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.rint.f64(double)
-declare <2 x double> @llvm.rint.v2f64(<2 x double>)
-declare <4 x double> @llvm.rint.v4f64(<4 x double>)
-declare <8 x double> @llvm.rint.v8f64(<8 x double>)
-declare <16 x double> @llvm.rint.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.rint.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.rint.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.rint.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.rint.nvx8f64(<vscale x 8 x double>)
-
-declare i64 @llvm.lrint.i64.f32(float)
-declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>)
-declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>)
-declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>)
-declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>)
-declare <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x i64> @llvm.lrint.nvx16i64.nvx16f32(<vscale x 16 x float>)
-declare i64 @llvm.lrint.i64.f64(double)
-declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
-declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>)
-declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
-declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>)
-declare <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8f64(<vscale x 8 x double>)
-
-declare i64 @llvm.llrint.i64.f32(float)
-declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
-declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
-declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
-declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
-declare <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x i64> @llvm.llrint.nvx16i64.nvx16f32(<vscale x 16 x float>)
-declare i64 @llvm.llrint.i64.f64(double)
-declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
-declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
-declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
-declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
-declare <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.nearbyint.f32(float)
-declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
-declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
-declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.nearbyint.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.nearbyint.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.nearbyint.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.nearbyint.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.nearbyint.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.nearbyint.f64(double)
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
-declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
-declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.nearbyint.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.nearbyint.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.nearbyint.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.nearbyint.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.round.f32(float)
-declare <2 x float> @llvm.round.v2f32(<2 x float>)
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
-declare <8 x float> @llvm.round.v8f32(<8 x float>)
-declare <16 x float> @llvm.round.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.round.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.round.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.round.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.round.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.round.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.round.f64(double)
-declare <2 x double> @llvm.round.v2f64(<2 x double>)
-declare <4 x double> @llvm.round.v4f64(<4 x double>)
-declare <8 x double> @llvm.round.v8f64(<8 x double>)
-declare <16 x double> @llvm.round.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.round.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.round.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.round.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.round.nvx8f64(<vscale x 8 x double>)
-
-declare float @llvm.roundeven.f32(float)
-declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
-declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
-declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
-declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
-declare <vscale x 1 x float> @llvm.roundeven.nvx1f32(<vscale x 1 x float>)
-declare <vscale x 2 x float> @llvm.roundeven.nvx2f32(<vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.roundeven.nvx4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.roundeven.nvx8f32(<vscale x 8 x float>)
-declare <vscale x 16 x float> @llvm.roundeven.nvx16f32(<vscale x 16 x float>)
-declare double @llvm.roundeven.f64(double)
-declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
-declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
-declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
-declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
-declare <vscale x 1 x double> @llvm.roundeven.nvx1f64(<vscale x 1 x double>)
-declare <vscale x 2 x double> @llvm.roundeven.nvx2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.roundeven.nvx4f64(<vscale x 4 x double>)
-declare <vscale x 8 x double> @llvm.roundeven.nvx8f64(<vscale x 8 x double>)
-
-declare <2 x float> @llvm.vp.ceil.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.ceil.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.ceil.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.ceil.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.ceil.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.ceil.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.ceil.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.ceil.f64(double)
-declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.ceil.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.ceil.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.ceil.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.ceil.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
-
-declare <2 x float> @llvm.vp.floor.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.floor.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.floor.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.floor.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.floor.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.floor.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.floor.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.floor.f64(double)
-declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.floor.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.floor.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.floor.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.floor.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
-
-declare <2 x float> @llvm.vp.round.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.round.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.round.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.round.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.round.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.round.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.round.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.round.f64(double)
-declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.round.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.round.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.round.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.round.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
-
-declare <2 x float> @llvm.vp.roundeven.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.roundeven.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.roundeven.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.roundeven.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.roundeven.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.roundeven.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.roundeven.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.roundeven.f64(double)
-declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.roundeven.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.roundeven.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.roundeven.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.roundeven.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
-
-declare <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.roundtozero.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.roundtozero.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.roundtozero.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.roundtozero.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.roundtozero.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.roundtozero.f64(double)
-declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.roundtozero.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.roundtozero.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.roundtozero.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.roundtozero.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
-
-declare <2 x float> @llvm.vp.rint.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.rint.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.rint.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.rint.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.rint.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.rint.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.rint.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.rint.f64(double)
-declare <2 x double> @llvm.vp.rint.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.rint.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.rint.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.rint.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.rint.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
-
-declare <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float>, <2 x i1>, i32)
-declare <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float>, <4 x i1>, i32)
-declare <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float>, <8 x i1>, i32)
-declare <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float>, <16 x i1>, i32)
-declare <vscale x 1 x float> @llvm.vp.nearbyint.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x float> @llvm.vp.nearbyint.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x float> @llvm.vp.nearbyint.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x float> @llvm.vp.nearbyint.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
-declare <vscale x 16 x float> @llvm.vp.nearbyint.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
-declare double @llvm.vp.nearbyint.f64(double)
-declare <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double>, <2 x i1>, i32)
-declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32)
-declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32)
-declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32)
-declare <vscale x 1 x double> @llvm.vp.nearbyint.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
-declare <vscale x 2 x double> @llvm.vp.nearbyint.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
-declare <vscale x 4 x double> @llvm.vp.nearbyint.nvx4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
-declare <vscale x 8 x double> @llvm.vp.nearbyint.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
+define void @vp_nearbyint_f16() {
+; ZVFH-LABEL: 'vp_nearbyint_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'vp_nearbyint_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x half> @llvm.vp.nearbyint.nvx1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x half> @llvm.vp.nearbyint.nvx2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x half> @llvm.vp.nearbyint.nvx4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x half> @llvm.vp.nearbyint.nvx8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x half> @llvm.vp.nearbyint.nvx16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
index e68fcf689c4ad2..2f35f72ca8c5c2 100644
--- a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFH
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFHMIN
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
@@ -15,12 +16,6 @@ define void @masked_gather_aligned() {
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
@@ -59,12 +54,6 @@ define void @masked_gather_aligned() {
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
@@ -103,12 +92,6 @@ define void @masked_gather_aligned() {
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
@@ -148,13 +131,6 @@ define void @masked_gather_aligned() {
   %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
   %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
 
-  %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-  %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-  %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-  %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-  %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-  %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-
   %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
   %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
   %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
@@ -189,6 +165,53 @@ define void @masked_gather_aligned() {
   ret void
 }
 
+define void @masked_gather_aligned_f16() {
+; GENERICZVFH-LABEL: 'masked_gather_aligned_f16'
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; GENERICZVFHMIN-LABEL: 'masked_gather_aligned_f16'
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; MAX256-LABEL: 'masked_gather_aligned_f16'
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; UNSUPPORTED-LABEL: 'masked_gather_aligned_f16'
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+  %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+  %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+  %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+  %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+  %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+
+  ret void
+}
+
 define void @masked_gather_unaligned() {
 ; CHECK-LABEL: 'masked_gather_unaligned'
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
@@ -200,12 +223,6 @@ define void @masked_gather_unaligned() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F32.u = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F32.u = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1F32.u = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8I64.u = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4I64.u = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2I64.u = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
@@ -238,13 +255,6 @@ define void @masked_gather_unaligned() {
   %V2F32.u = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
   %V1F32.u = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
 
-  %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-  %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-  %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-  %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-  %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-  %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-
   %V8I64.u = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
   %V4I64.u = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
   %V2I64.u = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
@@ -271,51 +281,22 @@ define void @masked_gather_unaligned() {
   ret void
 }
 
-declare <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x double>)
-declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
-declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
-declare <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x double>)
-
-declare <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x float>)
-declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
-declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
-declare <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x float>)
-
-declare <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
-declare <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x half>)
-declare <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
-declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
-declare <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x half>)
-
-declare <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
-declare <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
-declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-
-declare <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i32>)
-declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
-declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-declare <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
-
-declare <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x i16>)
-declare <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i16>)
-declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
-declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
-declare <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
-
-declare <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr>, i32, <vscale x 64 x i1>, <vscale x 64 x i8>)
-declare <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
-declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
-declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
-declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
-declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
+define void @masked_gather_unaligned_f16() {
+; CHECK-LABEL: 'masked_gather_unaligned_f16'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+  %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+  %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+  %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+  %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+  %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
 
-declare <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x ptr>)
-declare <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x ptr>)
-declare <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x ptr>)
-declare <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x ptr>)
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
index f7c8437e27e7f7..8e917a42966618 100644
--- a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFH
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,GENERIC,GENERICZVFHMIN
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
@@ -15,12 +16,6 @@ define void @masked_scatter_aligned() {
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
@@ -59,12 +54,6 @@ define void @masked_scatter_aligned() {
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
@@ -103,12 +92,6 @@ define void @masked_scatter_aligned() {
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
@@ -148,13 +131,6 @@ define void @masked_scatter_aligned() {
   call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
   call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
 
-  call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
-  call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
-  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
-  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
-  call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
-  call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
-
   call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
   call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
   call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
@@ -189,6 +165,53 @@ define void @masked_scatter_aligned() {
   ret void
 }
 
+define void @masked_scatter_aligned_f16() {
+; GENERICZVFH-LABEL: 'masked_scatter_aligned_f16'
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; GENERICZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; GENERICZVFHMIN-LABEL: 'masked_scatter_aligned_f16'
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; GENERICZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; MAX256-LABEL: 'masked_scatter_aligned_f16'
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; UNSUPPORTED-LABEL: 'masked_scatter_aligned_f16'
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+  call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+
+  ret void
+}
+
 define void @masked_scatter_unaligned() {
 ; CHECK-LABEL: 'masked_scatter_unaligned'
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
@@ -200,12 +223,6 @@ define void @masked_scatter_unaligned() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
@@ -238,13 +255,6 @@ define void @masked_scatter_unaligned() {
   call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
   call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
 
-  call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
-  call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
-  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
-  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
-  call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
-  call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
-
   call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
   call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
   call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
@@ -271,52 +281,22 @@ define void @masked_scatter_unaligned() {
   ret void
 }
 
-declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half>, <vscale x 32 x ptr>, i32, <vscale x 32 x i1>)
-declare void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16>, <vscale x 32 x ptr>, i32, <vscale x 32 x i1>)
-declare void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8>, <vscale x 64 x ptr>, i32, <vscale x 64 x i1>)
-declare void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8>, <vscale x 32 x ptr>, i32, <vscale x 32 x i1>)
-declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-
-declare void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
+define void @masked_scatter_unaligned_f16() {
+; CHECK-LABEL: 'masked_scatter_unaligned_f16'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+  call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
 
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
index 9b34320d4832be..acc340bab4cfdf 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE
 
 define void @broadcast_scalable() {
 ; CHECK-LABEL: 'broadcast_scalable'

diff  --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll
index 5fc7545af64e28..105cc8a1c2ba39 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-permute.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32
 

diff  --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
index e80dbe31683f3d..313217823df5ae 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-reverse.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-min=-1 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin -riscv-v-vector-bits-min=-1 | FileCheck %s
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefixes=SIZE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefixes=SIZE
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32
 

diff  --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll
index c3f20c858ba5e1..9e2017178132e6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-transpose.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-min=-1 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin -riscv-v-vector-bits-min=-1 | FileCheck %s
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefix=SIZE
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32
 

diff  --git a/llvm/test/Analysis/CostModel/RISCV/splice.ll b/llvm/test/Analysis/CostModel/RISCV/splice.ll
index 9acccef9c4f68c..8d7d1576a532da 100644
--- a/llvm/test/Analysis/CostModel/RISCV/splice.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/splice.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s
 ; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE
+; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE
 
 define void @vector_splice() {
 ; CHECK-LABEL: 'vector_splice'
@@ -165,59 +167,3 @@ define void @vector_splice() {
 
   ret void
 }
-
-declare <vscale x 1 x i8> @llvm.vector.splice.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, i32)
-declare <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
-declare <vscale x 4 x i8> @llvm.vector.splice.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, i32)
-declare <vscale x 8 x i8> @llvm.vector.splice.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, i32)
-declare <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
-declare <vscale x 32 x i8> @llvm.vector.splice.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, i32)
-declare <vscale x 64 x i8> @llvm.vector.splice.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, i32)
-
-declare <vscale x 1 x i16> @llvm.vector.splice.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, i32)
-declare <vscale x 2 x i16> @llvm.vector.splice.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, i32)
-declare <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, i32)
-declare <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
-declare <vscale x 16 x i16> @llvm.vector.splice.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, i32)
-declare <vscale x 32 x i16> @llvm.vector.splice.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, i32)
-declare <vscale x 64 x i16> @llvm.vector.splice.nxv64i16(<vscale x 64 x i16>, <vscale x 64 x i16>, i32)
-
-declare <vscale x 1 x i32> @llvm.vector.splice.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, i32)
-declare <vscale x 2 x i32> @llvm.vector.splice.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32)
-declare <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
-declare <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
-declare <vscale x 16 x i32> @llvm.vector.splice.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, i32)
-declare <vscale x 32 x i32> @llvm.vector.splice.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, i32)
-declare <vscale x 64 x i32> @llvm.vector.splice.nxv64i32(<vscale x 64 x i32>, <vscale x 64 x i32>, i32)
-
-declare <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i32)
-declare <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
-declare <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, i32)
-declare <vscale x 8 x i64> @llvm.vector.splice.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, i32)
-declare <vscale x 16 x i64> @llvm.vector.splice.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i64>, i32)
-declare <vscale x 32 x i64> @llvm.vector.splice.nxv32i64(<vscale x 32 x i64>, <vscale x 32 x i64>, i32)
-declare <vscale x 64 x i64> @llvm.vector.splice.nxv64i64(<vscale x 64 x i64>, <vscale x 64 x i64>, i32)
-
-declare <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, i32)
-declare <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
-declare <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
-declare <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
-declare <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, i32)
-declare <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, i32)
-declare <vscale x 64 x half> @llvm.vector.splice.nxv64f16(<vscale x 64 x half>, <vscale x 64 x half>, i32)
-
-declare <vscale x 1 x float> @llvm.vector.splice.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, i32)
-declare <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
-declare <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
-declare <vscale x 8 x float> @llvm.vector.splice.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, i32)
-declare <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
-declare <vscale x 32 x float> @llvm.vector.splice.nxv32f32(<vscale x 32 x float>, <vscale x 32 x float>, i32)
-declare <vscale x 64 x float> @llvm.vector.splice.nxv64f32(<vscale x 64 x float>, <vscale x 64 x float>, i32)
-
-declare <vscale x 1 x double> @llvm.vector.splice.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i32)
-declare <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
-declare <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, i32)
-declare <vscale x 8 x double> @llvm.vector.splice.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, i32)
-declare <vscale x 16 x double> @llvm.vector.splice.nxv16f64(<vscale x 16 x double>, <vscale x 16 x double>, i32)
-declare <vscale x 32 x double> @llvm.vector.splice.nxv32f64(<vscale x 32 x double>, <vscale x 32 x double>, i32)
-declare <vscale x 64 x double> @llvm.vector.splice.nxv64f64(<vscale x 64 x double>, <vscale x 64 x double>, i32)