[llvm] [RISCV][TTI] Add cost of typebased cast VPIntrinsics with functionalOPC. (PR #97797)

Sun Sep 1 19:53:24 PDT 2024

https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/97797

>From f1cc77ccd11f91ed6282bcf987be2eb7a271e4ca Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 7 Jun 2024 00:01:19 -0700
Subject: [PATCH 1/5] Precommit vp-cast intrinsic cost testcases

---
 .../CostModel/RISCV/rvv-intrinsics.ll         | 603 ++++++++++++++++++
 1 file changed, 603 insertions(+)

diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index 40aad95e715afd..0d155d830326f0 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -19,6 +19,609 @@ define void @unsupported_fp_ops(<vscale x 4 x float> %vec, i32 %extraarg) {
   ret void
 }
 
+define void @int_truncate() {
+; CHECK-LABEL: 'int_truncate'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'int_truncate'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  trunc i32 undef to i8
+  trunc <1 x i32> undef to <1 x i8>
+  trunc <2 x i32> undef to <2 x i8>
+  trunc <4 x i32> undef to <4 x i8>
+  trunc <8 x i32> undef to <8 x i8>
+  trunc <16 x i32> undef to <16 x i8>
+  trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+  trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+  trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+  trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+  call <1 x i8> @llvm.vp.trunc.v1i32.v1i8(<1 x i32> undef, <1 x i1> undef, i32 undef)
+  call <2 x i8> @llvm.vp.trunc.v2i32.v2i8(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  call <4 x i8> @llvm.vp.trunc.v4i32.v4i8(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  call <8 x i8> @llvm.vp.trunc.v8i32.v8i8(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  call <16 x i8> @llvm.vp.trunc.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i32.nxv1i8(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i32.nxv2i8(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i32.nxv4i8(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i32.nxv8i8(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @int_zext() {
+; CHECK-LABEL: 'int_zext'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'int_zext'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  zext i32 undef to i64
+  zext <1 x i32> undef to <1 x i64>
+  zext <2 x i32> undef to <2 x i64>
+  zext <4 x i32> undef to <4 x i64>
+  zext <8 x i32> undef to <8 x i64>
+  zext <16 x i32> undef to <16 x i64>
+  zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+  zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+  zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+  zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+  call <1 x i64> @llvm.vp.zext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
+  call <2 x i64> @llvm.vp.zext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  call <4 x i64> @llvm.vp.zext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  call <8 x i64> @llvm.vp.zext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  call <16 x i64> @llvm.vp.zext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.zext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.zext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.zext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @int_sext() {
+; CHECK-LABEL: 'int_sext'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'int_sext'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  sext i32 undef to i64
+  sext <1 x i32> undef to <1 x i64>
+  sext <2 x i32> undef to <2 x i64>
+  sext <4 x i32> undef to <4 x i64>
+  sext <8 x i32> undef to <8 x i64>
+  sext <16 x i32> undef to <16 x i64>
+  sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+  sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+  sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+  sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+  call <1 x i64> @llvm.vp.sext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
+  call <2 x i64> @llvm.vp.sext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  call <4 x i64> @llvm.vp.sext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  call <8 x i64> @llvm.vp.sext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  call <16 x i64> @llvm.vp.sext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.sext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.sext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.sext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fp_truncate() {
+; CHECK-LABEL: 'fp_truncate'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'fp_truncate'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fptrunc double undef to float
+  fptrunc <1 x double> undef to <1 x float>
+  fptrunc <2 x double> undef to <2 x float>
+  fptrunc <4 x double> undef to <4 x float>
+  fptrunc <8 x double> undef to <8 x float>
+  fptrunc <16 x double> undef to <16 x float>
+  fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+  fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+  fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+  fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+  call <1 x float> @llvm.vp.fptrunc.v1double.v1float(<1 x double> undef, <1 x i1> undef, i32 undef)
+  call <2 x float> @llvm.vp.fptrunc.v2double.v2float(<2 x double> undef, <2 x i1> undef, i32 undef)
+  call <4 x float> @llvm.vp.fptrunc.v4double.v4float(<4 x double> undef, <4 x i1> undef, i32 undef)
+  call <8 x float> @llvm.vp.fptrunc.v8double.v8float(<8 x double> undef, <8 x i1> undef, i32 undef)
+  call <16 x float> @llvm.vp.fptrunc.v16.v16(<16 x double> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1double.nxv1float(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2double.nxv2float(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4double.nxv4float(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8double.nxv8float(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fpext() {
+; CHECK-LABEL: 'fpext'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'fpext'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fpext float undef to double
+  fpext <1 x float> undef to <1 x double>
+  fpext <2 x float> undef to <2 x double>
+  fpext <4 x float> undef to <4 x double>
+  fpext <8 x float> undef to <8 x double>
+  fpext <16 x float> undef to <16 x double>
+  fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+  fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+  fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+  fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+  call <1 x double> @llvm.vp.fpext.v1float.v1double(<1 x float> undef, <1 x i1> undef, i32 undef)
+  call <2 x double> @llvm.vp.fpext.v2float.v2double(<2 x float> undef, <2 x i1> undef, i32 undef)
+  call <4 x double> @llvm.vp.fpext.v4float.v4double(<4 x float> undef, <4 x i1> undef, i32 undef)
+  call <8 x double> @llvm.vp.fpext.v8float.v8double(<8 x float> undef, <8 x i1> undef, i32 undef)
+  call <16 x double> @llvm.vp.fpext.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.fpext.nxv1float.nxv1double(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.fpext.nxv2float.nxv2double(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.fpext.nxv4float.nxv4double(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.fpext.nxv8float.nxv8double(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fptoui() {
+; CHECK-LABEL: 'fptoui'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'fptoui'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fptoui float undef to i32
+  fptoui <1 x float> undef to <1 x i32>
+  fptoui <2 x float> undef to <2 x i32>
+  fptoui <4 x float> undef to <4 x i32>
+  fptoui <8 x float> undef to <8 x i32>
+  fptoui <16 x float> undef to <16 x i32>
+  fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+  fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+  fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+  fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+  call <1 x i32> @llvm.vp.fptoui.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
+  call <2 x i32> @llvm.vp.fptoui.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
+  call <4 x i32> @llvm.vp.fptoui.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
+  call <8 x i32> @llvm.vp.fptoui.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
+  call <16 x i32> @llvm.vp.fptoui.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fptosi() {
+; CHECK-LABEL: 'fptosi'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'fptosi'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fptosi float undef to i32
+  fptosi <1 x float> undef to <1 x i32>
+  fptosi <2 x float> undef to <2 x i32>
+  fptosi <4 x float> undef to <4 x i32>
+  fptosi <8 x float> undef to <8 x i32>
+  fptosi <16 x float> undef to <16 x i32>
+  fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+  fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+  fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+  fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+  call <1 x i32> @llvm.vp.fptosi.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
+  call <2 x i32> @llvm.vp.fptosi.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
+  call <4 x i32> @llvm.vp.fptosi.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
+  call <8 x i32> @llvm.vp.fptosi.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
+  call <16 x i32> @llvm.vp.fptosi.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @uitofp() {
+; CHECK-LABEL: 'uitofp'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'uitofp'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  uitofp i64 undef to double
+  uitofp <1 x i64> undef to <1 x double>
+  uitofp <2 x i64> undef to <2 x double>
+  uitofp <4 x i64> undef to <4 x double>
+  uitofp <8 x i64> undef to <8 x double>
+  uitofp <16 x i64> undef to <16 x double>
+  uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+  uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+  uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+  uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+  call <1 x double> @llvm.vp.uitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
+  call <2 x double> @llvm.vp.uitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  call <4 x double> @llvm.vp.uitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  call <8 x double> @llvm.vp.uitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  call <16 x double> @llvm.vp.uitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.uitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.uitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.uitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.uitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @sitofp() {
+; CHECK-LABEL: 'sitofp'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; TYPEBASED-LABEL: 'sitofp'
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  sitofp i64 undef to double
+  sitofp <1 x i64> undef to <1 x double>
+  sitofp <2 x i64> undef to <2 x double>
+  sitofp <4 x i64> undef to <4 x double>
+  sitofp <8 x i64> undef to <8 x double>
+  sitofp <16 x i64> undef to <16 x double>
+  sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+  sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+  sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+  sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+  call <1 x double> @llvm.vp.sitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
+  call <2 x double> @llvm.vp.sitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  call <4 x double> @llvm.vp.sitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  call <8 x double> @llvm.vp.sitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  call <16 x double> @llvm.vp.sitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
 define void @powi(<vscale x 4 x float> %vec) {
 ; CHECK-LABEL: 'powi'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)

>From 9503976d9bedd7118f4360903c0ec85758135e6e Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 7 Jun 2024 00:15:02 -0700
Subject: [PATCH 2/5] [RISCV][TTI] Add cost of typebased cast VPIntrinsics with
 functionalOPC.

This patch make the instrudction cost of typebased cast VP intrinsics that will be same as
their non-VP counterpart.
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  18 ++
 .../CostModel/RISCV/rvv-intrinsics.ll         | 292 +++++++++---------
 2 files changed, 164 insertions(+), 146 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 5ec07b2a0aa8fd..c3d5648fc93200 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1015,6 +1015,24 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
       return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
     break;
   }
+  // vp int cast ops.
+  case Intrinsic::vp_trunc:
+  case Intrinsic::vp_zext:
+  case Intrinsic::vp_sext:
+  // vp float cast ops.
+  case Intrinsic::vp_fptoui:
+  case Intrinsic::vp_fptosi:
+  case Intrinsic::vp_uitofp:
+  case Intrinsic::vp_sitofp:
+  case Intrinsic::vp_fptrunc:
+  case Intrinsic::vp_fpext: {
+    std::optional<unsigned> FOp =
+        VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
+    if (FOp && !ICA.getArgTypes().empty())
+      return getCastInstrCost(*FOp, ICA.getReturnType(), ICA.getArgTypes()[0],
+                              TTI::CastContextHint::None, CostKind);
+    break;
+  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index 0d155d830326f0..3ac9359919d306 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -31,15 +31,15 @@ define void @int_truncate() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'int_truncate'
@@ -53,15 +53,15 @@ define void @int_truncate() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   trunc i32 undef to i8
@@ -99,14 +99,14 @@ define void @int_zext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'int_zext'
@@ -121,14 +121,14 @@ define void @int_zext() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   zext i32 undef to i64
@@ -166,14 +166,14 @@ define void @int_sext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'int_sext'
@@ -188,14 +188,14 @@ define void @int_sext() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   sext i32 undef to i64
@@ -233,14 +233,14 @@ define void @fp_truncate() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'fp_truncate'
@@ -255,14 +255,14 @@ define void @fp_truncate() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   fptrunc double undef to float
@@ -300,14 +300,14 @@ define void @fpext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'fpext'
@@ -322,14 +322,14 @@ define void @fpext() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   fpext float undef to double
@@ -367,14 +367,14 @@ define void @fptoui() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'fptoui'
@@ -389,14 +389,14 @@ define void @fptoui() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   fptoui float undef to i32
@@ -434,14 +434,14 @@ define void @fptosi() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'fptosi'
@@ -456,14 +456,14 @@ define void @fptosi() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   fptosi float undef to i32
@@ -501,14 +501,14 @@ define void @uitofp() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'uitofp'
@@ -523,14 +523,14 @@ define void @uitofp() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   uitofp i64 undef to double
@@ -568,14 +568,14 @@ define void @sitofp() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPEBASED-LABEL: 'sitofp'
@@ -590,14 +590,14 @@ define void @sitofp() {
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 141 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   sitofp i64 undef to double

>From c30b2ce35159b3f1c39f5430f410a8729512f5e0 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 5 Aug 2024 19:26:15 -0700
Subject: [PATCH 3/5] Address comment and migrate testcases to vp-cast

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |    2 +-
 .../CostModel/RISCV/rvv-intrinsics.ll         |  603 ----------
 llvm/test/Analysis/CostModel/RISCV/vp-cast.ll | 1004 +++++++++++++++++
 3 files changed, 1005 insertions(+), 604 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/RISCV/vp-cast.ll

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index c3d5648fc93200..e0ebacc571af30 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1029,7 +1029,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     std::optional<unsigned> FOp =
         VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
     if (FOp && !ICA.getArgTypes().empty())
-      return getCastInstrCost(*FOp, ICA.getReturnType(), ICA.getArgTypes()[0],
+      return getCastInstrCost(*FOp, RetTy, ICA.getArgTypes()[0],
                               TTI::CastContextHint::None, CostKind);
     break;
   }
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
index 3ac9359919d306..40aad95e715afd 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll
@@ -19,609 +19,6 @@ define void @unsupported_fp_ops(<vscale x 4 x float> %vec, i32 %extraarg) {
   ret void
 }
 
-define void @int_truncate() {
-; CHECK-LABEL: 'int_truncate'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'int_truncate'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  trunc i32 undef to i8
-  trunc <1 x i32> undef to <1 x i8>
-  trunc <2 x i32> undef to <2 x i8>
-  trunc <4 x i32> undef to <4 x i8>
-  trunc <8 x i32> undef to <8 x i8>
-  trunc <16 x i32> undef to <16 x i8>
-  trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-  trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-  trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-  trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-  call <1 x i8> @llvm.vp.trunc.v1i32.v1i8(<1 x i32> undef, <1 x i1> undef, i32 undef)
-  call <2 x i8> @llvm.vp.trunc.v2i32.v2i8(<2 x i32> undef, <2 x i1> undef, i32 undef)
-  call <4 x i8> @llvm.vp.trunc.v4i32.v4i8(<4 x i32> undef, <4 x i1> undef, i32 undef)
-  call <8 x i8> @llvm.vp.trunc.v8i32.v8i8(<8 x i32> undef, <8 x i1> undef, i32 undef)
-  call <16 x i8> @llvm.vp.trunc.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i32.nxv1i8(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i32.nxv2i8(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i32.nxv4i8(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i32.nxv8i8(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @int_zext() {
-; CHECK-LABEL: 'int_zext'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'int_zext'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  zext i32 undef to i64
-  zext <1 x i32> undef to <1 x i64>
-  zext <2 x i32> undef to <2 x i64>
-  zext <4 x i32> undef to <4 x i64>
-  zext <8 x i32> undef to <8 x i64>
-  zext <16 x i32> undef to <16 x i64>
-  zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-  zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-  zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-  zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-  call <1 x i64> @llvm.vp.zext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
-  call <2 x i64> @llvm.vp.zext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
-  call <4 x i64> @llvm.vp.zext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
-  call <8 x i64> @llvm.vp.zext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
-  call <16 x i64> @llvm.vp.zext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.zext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.zext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.zext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @int_sext() {
-; CHECK-LABEL: 'int_sext'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'int_sext'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  sext i32 undef to i64
-  sext <1 x i32> undef to <1 x i64>
-  sext <2 x i32> undef to <2 x i64>
-  sext <4 x i32> undef to <4 x i64>
-  sext <8 x i32> undef to <8 x i64>
-  sext <16 x i32> undef to <16 x i64>
-  sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-  sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-  sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-  sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-  call <1 x i64> @llvm.vp.sext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
-  call <2 x i64> @llvm.vp.sext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
-  call <4 x i64> @llvm.vp.sext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
-  call <8 x i64> @llvm.vp.sext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
-  call <16 x i64> @llvm.vp.sext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.sext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.sext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.sext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fp_truncate() {
-; CHECK-LABEL: 'fp_truncate'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'fp_truncate'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fptrunc double undef to float
-  fptrunc <1 x double> undef to <1 x float>
-  fptrunc <2 x double> undef to <2 x float>
-  fptrunc <4 x double> undef to <4 x float>
-  fptrunc <8 x double> undef to <8 x float>
-  fptrunc <16 x double> undef to <16 x float>
-  fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-  fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-  fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-  fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-  call <1 x float> @llvm.vp.fptrunc.v1double.v1float(<1 x double> undef, <1 x i1> undef, i32 undef)
-  call <2 x float> @llvm.vp.fptrunc.v2double.v2float(<2 x double> undef, <2 x i1> undef, i32 undef)
-  call <4 x float> @llvm.vp.fptrunc.v4double.v4float(<4 x double> undef, <4 x i1> undef, i32 undef)
-  call <8 x float> @llvm.vp.fptrunc.v8double.v8float(<8 x double> undef, <8 x i1> undef, i32 undef)
-  call <16 x float> @llvm.vp.fptrunc.v16.v16(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1double.nxv1float(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2double.nxv2float(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4double.nxv4float(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8double.nxv8float(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fpext() {
-; CHECK-LABEL: 'fpext'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'fpext'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fpext float undef to double
-  fpext <1 x float> undef to <1 x double>
-  fpext <2 x float> undef to <2 x double>
-  fpext <4 x float> undef to <4 x double>
-  fpext <8 x float> undef to <8 x double>
-  fpext <16 x float> undef to <16 x double>
-  fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-  fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-  fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-  fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-  call <1 x double> @llvm.vp.fpext.v1float.v1double(<1 x float> undef, <1 x i1> undef, i32 undef)
-  call <2 x double> @llvm.vp.fpext.v2float.v2double(<2 x float> undef, <2 x i1> undef, i32 undef)
-  call <4 x double> @llvm.vp.fpext.v4float.v4double(<4 x float> undef, <4 x i1> undef, i32 undef)
-  call <8 x double> @llvm.vp.fpext.v8float.v8double(<8 x float> undef, <8 x i1> undef, i32 undef)
-  call <16 x double> @llvm.vp.fpext.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.fpext.nxv1float.nxv1double(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.fpext.nxv2float.nxv2double(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.fpext.nxv4float.nxv4double(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.fpext.nxv8float.nxv8double(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fptoui() {
-; CHECK-LABEL: 'fptoui'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'fptoui'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fptoui float undef to i32
-  fptoui <1 x float> undef to <1 x i32>
-  fptoui <2 x float> undef to <2 x i32>
-  fptoui <4 x float> undef to <4 x i32>
-  fptoui <8 x float> undef to <8 x i32>
-  fptoui <16 x float> undef to <16 x i32>
-  fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-  fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-  fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-  fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-  call <1 x i32> @llvm.vp.fptoui.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
-  call <2 x i32> @llvm.vp.fptoui.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
-  call <4 x i32> @llvm.vp.fptoui.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
-  call <8 x i32> @llvm.vp.fptoui.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
-  call <16 x i32> @llvm.vp.fptoui.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fptosi() {
-; CHECK-LABEL: 'fptosi'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'fptosi'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fptosi float undef to i32
-  fptosi <1 x float> undef to <1 x i32>
-  fptosi <2 x float> undef to <2 x i32>
-  fptosi <4 x float> undef to <4 x i32>
-  fptosi <8 x float> undef to <8 x i32>
-  fptosi <16 x float> undef to <16 x i32>
-  fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-  fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-  fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-  fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-  call <1 x i32> @llvm.vp.fptosi.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
-  call <2 x i32> @llvm.vp.fptosi.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
-  call <4 x i32> @llvm.vp.fptosi.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
-  call <8 x i32> @llvm.vp.fptosi.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
-  call <16 x i32> @llvm.vp.fptosi.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @uitofp() {
-; CHECK-LABEL: 'uitofp'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'uitofp'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  uitofp i64 undef to double
-  uitofp <1 x i64> undef to <1 x double>
-  uitofp <2 x i64> undef to <2 x double>
-  uitofp <4 x i64> undef to <4 x double>
-  uitofp <8 x i64> undef to <8 x double>
-  uitofp <16 x i64> undef to <16 x double>
-  uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-  uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-  uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-  uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-  call <1 x double> @llvm.vp.uitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
-  call <2 x double> @llvm.vp.uitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
-  call <4 x double> @llvm.vp.uitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
-  call <8 x double> @llvm.vp.uitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
-  call <16 x double> @llvm.vp.uitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.uitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.uitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.uitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.uitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @sitofp() {
-; CHECK-LABEL: 'sitofp'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; TYPEBASED-LABEL: 'sitofp'
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  sitofp i64 undef to double
-  sitofp <1 x i64> undef to <1 x double>
-  sitofp <2 x i64> undef to <2 x double>
-  sitofp <4 x i64> undef to <4 x double>
-  sitofp <8 x i64> undef to <8 x double>
-  sitofp <16 x i64> undef to <16 x double>
-  sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-  sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-  sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-  sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-  call <1 x double> @llvm.vp.sitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
-  call <2 x double> @llvm.vp.sitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
-  call <4 x double> @llvm.vp.sitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
-  call <8 x double> @llvm.vp.sitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
-  call <16 x double> @llvm.vp.sitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
 define void @powi(<vscale x 4 x float> %vec) {
 ; CHECK-LABEL: 'powi'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-cast.ll b/llvm/test/Analysis/CostModel/RISCV/vp-cast.ll
new file mode 100644
index 00000000000000..ecd60050d564fc
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-cast.ll
@@ -0,0 +1,1004 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv32 -mattr=+v | FileCheck %s --check-prefixes=RV32
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv32 -mattr=+v --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=RV32_TYPEBASED
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=RV64
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=RV64_TYPEBASED
+
+define void @int_truncate() {
+; RV32-LABEL: 'int_truncate'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'int_truncate'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'int_truncate'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'int_truncate'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  trunc i32 undef to i8
+  trunc <1 x i32> undef to <1 x i8>
+  trunc <2 x i32> undef to <2 x i8>
+  trunc <4 x i32> undef to <4 x i8>
+  trunc <8 x i32> undef to <8 x i8>
+  trunc <16 x i32> undef to <16 x i8>
+  trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
+  trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
+  trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
+  trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+  call <1 x i8> @llvm.vp.trunc.v1i32.v1i8(<1 x i32> undef, <1 x i1> undef, i32 undef)
+  call <2 x i8> @llvm.vp.trunc.v2i32.v2i8(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  call <4 x i8> @llvm.vp.trunc.v4i32.v4i8(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  call <8 x i8> @llvm.vp.trunc.v8i32.v8i8(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  call <16 x i8> @llvm.vp.trunc.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i32.nxv1i8(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i32.nxv2i8(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i32.nxv4i8(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i32.nxv8i8(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @int_zext() {
+; RV32-LABEL: 'int_zext'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'int_zext'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'int_zext'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'int_zext'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  zext i32 undef to i64
+  zext <1 x i32> undef to <1 x i64>
+  zext <2 x i32> undef to <2 x i64>
+  zext <4 x i32> undef to <4 x i64>
+  zext <8 x i32> undef to <8 x i64>
+  zext <16 x i32> undef to <16 x i64>
+  zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+  zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+  zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+  zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+  call <1 x i64> @llvm.vp.zext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
+  call <2 x i64> @llvm.vp.zext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  call <4 x i64> @llvm.vp.zext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  call <8 x i64> @llvm.vp.zext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  call <16 x i64> @llvm.vp.zext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.zext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.zext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.zext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @int_sext() {
+; RV32-LABEL: 'int_sext'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'int_sext'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'int_sext'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'int_sext'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  sext i32 undef to i64
+  sext <1 x i32> undef to <1 x i64>
+  sext <2 x i32> undef to <2 x i64>
+  sext <4 x i32> undef to <4 x i64>
+  sext <8 x i32> undef to <8 x i64>
+  sext <16 x i32> undef to <16 x i64>
+  sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
+  sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
+  sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
+  sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
+  call <1 x i64> @llvm.vp.sext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
+  call <2 x i64> @llvm.vp.sext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  call <4 x i64> @llvm.vp.sext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  call <8 x i64> @llvm.vp.sext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  call <16 x i64> @llvm.vp.sext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i64> @llvm.vp.sext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i64> @llvm.vp.sext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i64> @llvm.vp.sext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fp_truncate() {
+; RV32-LABEL: 'fp_truncate'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'fp_truncate'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'fp_truncate'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'fp_truncate'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fptrunc double undef to float
+  fptrunc <1 x double> undef to <1 x float>
+  fptrunc <2 x double> undef to <2 x float>
+  fptrunc <4 x double> undef to <4 x float>
+  fptrunc <8 x double> undef to <8 x float>
+  fptrunc <16 x double> undef to <16 x float>
+  fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+  fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+  fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+  fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+  call <1 x float> @llvm.vp.fptrunc.v1double.v1float(<1 x double> undef, <1 x i1> undef, i32 undef)
+  call <2 x float> @llvm.vp.fptrunc.v2double.v2float(<2 x double> undef, <2 x i1> undef, i32 undef)
+  call <4 x float> @llvm.vp.fptrunc.v4double.v4float(<4 x double> undef, <4 x i1> undef, i32 undef)
+  call <8 x float> @llvm.vp.fptrunc.v8double.v8float(<8 x double> undef, <8 x i1> undef, i32 undef)
+  call <16 x float> @llvm.vp.fptrunc.v16.v16(<16 x double> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1double.nxv1float(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2double.nxv2float(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4double.nxv4float(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8double.nxv8float(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fpext() {
+; RV32-LABEL: 'fpext'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'fpext'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'fpext'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'fpext'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fpext float undef to double
+  fpext <1 x float> undef to <1 x double>
+  fpext <2 x float> undef to <2 x double>
+  fpext <4 x float> undef to <4 x double>
+  fpext <8 x float> undef to <8 x double>
+  fpext <16 x float> undef to <16 x double>
+  fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+  fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+  fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+  fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+  call <1 x double> @llvm.vp.fpext.v1float.v1double(<1 x float> undef, <1 x i1> undef, i32 undef)
+  call <2 x double> @llvm.vp.fpext.v2float.v2double(<2 x float> undef, <2 x i1> undef, i32 undef)
+  call <4 x double> @llvm.vp.fpext.v4float.v4double(<4 x float> undef, <4 x i1> undef, i32 undef)
+  call <8 x double> @llvm.vp.fpext.v8float.v8double(<8 x float> undef, <8 x i1> undef, i32 undef)
+  call <16 x double> @llvm.vp.fpext.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.fpext.nxv1float.nxv1double(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.fpext.nxv2float.nxv2double(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.fpext.nxv4float.nxv4double(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.fpext.nxv8float.nxv8double(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fptoui() {
+; RV32-LABEL: 'fptoui'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'fptoui'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'fptoui'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'fptoui'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fptoui float undef to i32
+  fptoui <1 x float> undef to <1 x i32>
+  fptoui <2 x float> undef to <2 x i32>
+  fptoui <4 x float> undef to <4 x i32>
+  fptoui <8 x float> undef to <8 x i32>
+  fptoui <16 x float> undef to <16 x i32>
+  fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
+  fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
+  fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
+  fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
+  call <1 x i32> @llvm.vp.fptoui.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
+  call <2 x i32> @llvm.vp.fptoui.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
+  call <4 x i32> @llvm.vp.fptoui.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
+  call <8 x i32> @llvm.vp.fptoui.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
+  call <16 x i32> @llvm.vp.fptoui.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @fptosi() {
+; RV32-LABEL: 'fptosi'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'fptosi'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'fptosi'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'fptosi'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  fptosi float undef to i32
+  fptosi <1 x float> undef to <1 x i32>
+  fptosi <2 x float> undef to <2 x i32>
+  fptosi <4 x float> undef to <4 x i32>
+  fptosi <8 x float> undef to <8 x i32>
+  fptosi <16 x float> undef to <16 x i32>
+  fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
+  fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
+  fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
+  fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
+  call <1 x i32> @llvm.vp.fptosi.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
+  call <2 x i32> @llvm.vp.fptosi.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
+  call <4 x i32> @llvm.vp.fptosi.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
+  call <8 x i32> @llvm.vp.fptosi.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
+  call <16 x i32> @llvm.vp.fptosi.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @uitofp() {
+; RV32-LABEL: 'uitofp'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'uitofp'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'uitofp'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'uitofp'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  uitofp i64 undef to double
+  uitofp <1 x i64> undef to <1 x double>
+  uitofp <2 x i64> undef to <2 x double>
+  uitofp <4 x i64> undef to <4 x double>
+  uitofp <8 x i64> undef to <8 x double>
+  uitofp <16 x i64> undef to <16 x double>
+  uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+  uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+  uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+  uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+  call <1 x double> @llvm.vp.uitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
+  call <2 x double> @llvm.vp.uitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  call <4 x double> @llvm.vp.uitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  call <8 x double> @llvm.vp.uitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  call <16 x double> @llvm.vp.uitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.uitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.uitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.uitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.uitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}
+
+define void @sitofp() {
+; RV32-LABEL: 'sitofp'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV32_TYPEBASED-LABEL: 'sitofp'
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'sitofp'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64_TYPEBASED-LABEL: 'sitofp'
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  sitofp i64 undef to double
+  sitofp <1 x i64> undef to <1 x double>
+  sitofp <2 x i64> undef to <2 x double>
+  sitofp <4 x i64> undef to <4 x double>
+  sitofp <8 x i64> undef to <8 x double>
+  sitofp <16 x i64> undef to <16 x double>
+  sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
+  sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
+  sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
+  sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
+  call <1 x double> @llvm.vp.sitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
+  call <2 x double> @llvm.vp.sitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  call <4 x double> @llvm.vp.sitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  call <8 x double> @llvm.vp.sitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  call <16 x double> @llvm.vp.sitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  ret void
+}

>From 68970e028a87f5d35f1cc76c2b84d304b9016bef Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 28 Aug 2024 22:37:00 -0700
Subject: [PATCH 4/5] Add assertions

---
 llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e0ebacc571af30..9462f6e670bb0b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1011,8 +1011,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   case Intrinsic::vp_frem: {
     std::optional<unsigned> FOp =
         VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
-    if (FOp)
-      return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
+    assert(FOp.has_value());
+    return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
     break;
   }
   // vp int cast ops.
@@ -1028,9 +1028,9 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   case Intrinsic::vp_fpext: {
     std::optional<unsigned> FOp =
         VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
-    if (FOp && !ICA.getArgTypes().empty())
-      return getCastInstrCost(*FOp, RetTy, ICA.getArgTypes()[0],
-                              TTI::CastContextHint::None, CostKind);
+    assert(FOp.has_value() && !ICA.getArgTypes().empty());
+    return getCastInstrCost(*FOp, RetTy, ICA.getArgTypes()[0],
+                            TTI::CastContextHint::None, CostKind);
     break;
   }
   }

>From 345a89a069f12f263972a449ea14354699073877 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 1 Sep 2024 19:22:26 -0700
Subject: [PATCH 5/5] Merge vp.cast tests into cast.ll

---
 llvm/test/Analysis/CostModel/RISCV/cast.ll    | 4247 +++++++++++++++++
 llvm/test/Analysis/CostModel/RISCV/vp-cast.ll | 1004 ----
 2 files changed, 4247 insertions(+), 1004 deletions(-)
 delete mode 100644 llvm/test/Analysis/CostModel/RISCV/vp-cast.ll

diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll
index e90fab9fbc8c46..e98af65f1df3ce 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput --type-based-intrinsic-cost=true 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput --type-based-intrinsic-cost=true 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @sext() {
 ; RV32-LABEL: 'sext'
@@ -14,6 +16,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i16 = sext <2 x i1> undef to <2 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i32 = sext <2 x i1> undef to <2 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i16 = call <2 x i16> @llvm.vp.sext.v2i16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i32 = call <2 x i32> @llvm.vp.sext.v2i32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i32 = call <2 x i32> @llvm.vp.sext.v2i32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i32_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i8 = call <2 x i8> @llvm.vp.sext.v2i8.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i16 = call <2 x i16> @llvm.vp.sext.v2i16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i32 = call <2 x i32> @llvm.vp.sext.v2i32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
@@ -24,6 +36,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = sext <4 x i1> undef to <4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i16 = call <4 x i16> @llvm.vp.sext.v4i16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i32 = call <4 x i32> @llvm.vp.sext.v4i32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i8_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i16_v4i32 = call <4 x i32> @llvm.vp.sext.v4i32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i16_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i8 = call <4 x i8> @llvm.vp.sext.v4i8.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i16 = call <4 x i16> @llvm.vp.sext.v4i16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i32 = call <4 x i32> @llvm.vp.sext.v4i32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4i1_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
@@ -34,6 +56,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = sext <8 x i1> undef to <8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i8_v8i16 = call <8 x i16> @llvm.vp.sext.v8i16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i8_v8i32 = call <8 x i32> @llvm.vp.sext.v8i32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i8_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i16_v8i32 = call <8 x i32> @llvm.vp.sext.v8i32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i16_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i32_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i8 = call <8 x i8> @llvm.vp.sext.v8i8.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i16 = call <8 x i16> @llvm.vp.sext.v8i16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i1_v8i32 = call <8 x i32> @llvm.vp.sext.v8i32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8i1_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
@@ -44,6 +76,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i8_v16i16 = call <16 x i16> @llvm.vp.sext.v16i16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i8_v16i32 = call <16 x i32> @llvm.vp.sext.v16i32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i8_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i16_v16i32 = call <16 x i32> @llvm.vp.sext.v16i32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i16_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i32_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i1_v16i8 = call <16 x i8> @llvm.vp.sext.v16i8.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i1_v16i16 = call <16 x i16> @llvm.vp.sext.v16i16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i1_v16i32 = call <16 x i32> @llvm.vp.sext.v16i32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16i1_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
@@ -54,6 +96,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i8_v32i16 = call <32 x i16> @llvm.vp.sext.v32i16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i8_v32i32 = call <32 x i32> @llvm.vp.sext.v32i32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i8_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i16_v32i32 = call <32 x i32> @llvm.vp.sext.v32i32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i16_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i32_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i1_v32i8 = call <32 x i8> @llvm.vp.sext.v32i8.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i1_v32i16 = call <32 x i16> @llvm.vp.sext.v32i16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32i1_v32i32 = call <32 x i32> @llvm.vp.sext.v32i32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32i1_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
@@ -64,6 +116,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i8_v64i16 = call <64 x i16> @llvm.vp.sext.v64i16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i8_v64i32 = call <64 x i32> @llvm.vp.sext.v64i32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i8_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i16_v64i32 = call <64 x i32> @llvm.vp.sext.v64i32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i16_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64i32_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i1_v64i8 = call <64 x i8> @llvm.vp.sext.v64i8.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64i1_v64i16 = call <64 x i16> @llvm.vp.sext.v64i16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64i1_v64i32 = call <64 x i32> @llvm.vp.sext.v64i32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64i1_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
@@ -74,6 +136,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v128i8_v128i16 = call <128 x i16> @llvm.vp.sext.v128i16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v128i8_v128i32 = call <128 x i32> @llvm.vp.sext.v128i32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_v128i8_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128i16_v128i32 = call <128 x i32> @llvm.vp.sext.v128i32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v128i16_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128i32_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v128i1_v128i8 = call <128 x i8> @llvm.vp.sext.v128i8.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v128i1_v128i16 = call <128 x i16> @llvm.vp.sext.v128i16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128i1_v128i32 = call <128 x i32> @llvm.vp.sext.v128i32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128i1_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
@@ -84,6 +156,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v256i8_v256i16 = call <256 x i16> @llvm.vp.sext.v256i16.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v256i8_v256i32 = call <256 x i32> @llvm.vp.sext.v256i32.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %vp_v256i8_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v256i16_v256i32 = call <256 x i32> @llvm.vp.sext.v256i32.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vp_v256i16_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %vp_v256i32_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v256i1_v256i8 = call <256 x i8> @llvm.vp.sext.v256i8.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_v256i1_v256i16 = call <256 x i16> @llvm.vp.sext.v256i16.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_v256i1_v256i32 = call <256 x i32> @llvm.vp.sext.v256i32.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vp_v256i1_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -94,6 +176,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i16 = sext <vscale x 1 x i1> undef to <vscale x 1 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i32 = sext <vscale x 1 x i1> undef to <vscale x 1 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.sext.nxv1i16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.sext.nxv1i8.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.sext.nxv1i16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
@@ -104,6 +196,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = sext <vscale x 2 x i1> undef to <vscale x 2 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.sext.nxv2i16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i8_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.sext.nxv2i8.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.sext.nxv2i16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2i1_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
@@ -114,6 +216,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = sext <vscale x 4 x i1> undef to <vscale x 4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i8_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.sext.nxv4i16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i8_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i8_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.sext.nxv4i8.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.sext.nxv4i16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i1_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4i1_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
@@ -124,6 +236,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i8_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.sext.nxv8i16.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i8_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i8_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i1_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.sext.nxv8i8.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i1_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.sext.nxv8i16.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i1_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8i1_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
@@ -134,6 +256,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i8_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.sext.nxv16i16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i8_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i8_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i1_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.sext.nxv16i8.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i1_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.sext.nxv16i16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16i1_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16i1_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
@@ -144,6 +276,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i8_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.sext.nxv32i16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i8_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i8_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32i32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i1_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.sext.nxv32i8.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv32i1_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.sext.nxv32i16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32i1_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32i1_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64>
@@ -154,6 +296,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv64i8_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.sext.nxv64i16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv64i8_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64i8_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64i16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_nxv64i16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vp_nxv64i32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv64i1_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.sext.nxv64i8.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv64i1_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.sext.nxv64i16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64i1_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64i1_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128>
@@ -164,6 +316,16 @@ define void @sext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv128i8_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.sext.nxv128i16.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_nxv128i8_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i32.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv128i8_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv128i16_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i32.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 143 for instruction: %vp_nxv128i16_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 139 for instruction: %vp_nxv128i32_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i32(<vscale x 128 x i32> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_nxv128i1_nxv128i8 = call <vscale x 128 x i8> @llvm.vp.sext.nxv128i8.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_nxv128i1_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.sext.nxv128i16.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_nxv128i1_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i32.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv128i1_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'sext'
@@ -177,6 +339,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i16 = sext <2 x i1> undef to <2 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i32 = sext <2 x i1> undef to <2 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i16 = call <2 x i16> @llvm.vp.sext.v2i16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i32 = call <2 x i32> @llvm.vp.sext.v2i32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i32 = call <2 x i32> @llvm.vp.sext.v2i32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i32_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i8 = call <2 x i8> @llvm.vp.sext.v2i8.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i16 = call <2 x i16> @llvm.vp.sext.v2i16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i32 = call <2 x i32> @llvm.vp.sext.v2i32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i64 = call <2 x i64> @llvm.vp.sext.v2i64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
@@ -187,6 +359,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = sext <4 x i1> undef to <4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i16 = call <4 x i16> @llvm.vp.sext.v4i16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i32 = call <4 x i32> @llvm.vp.sext.v4i32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i8_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i16_v4i32 = call <4 x i32> @llvm.vp.sext.v4i32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i16_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i8 = call <4 x i8> @llvm.vp.sext.v4i8.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i16 = call <4 x i16> @llvm.vp.sext.v4i16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i32 = call <4 x i32> @llvm.vp.sext.v4i32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4i1_v4i64 = call <4 x i64> @llvm.vp.sext.v4i64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
@@ -197,6 +379,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = sext <8 x i1> undef to <8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i8_v8i16 = call <8 x i16> @llvm.vp.sext.v8i16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i8_v8i32 = call <8 x i32> @llvm.vp.sext.v8i32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i8_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i16_v8i32 = call <8 x i32> @llvm.vp.sext.v8i32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i16_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i32_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i8 = call <8 x i8> @llvm.vp.sext.v8i8.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i16 = call <8 x i16> @llvm.vp.sext.v8i16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i1_v8i32 = call <8 x i32> @llvm.vp.sext.v8i32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8i1_v8i64 = call <8 x i64> @llvm.vp.sext.v8i64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
@@ -207,6 +399,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = sext <16 x i1> undef to <16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i8_v16i16 = call <16 x i16> @llvm.vp.sext.v16i16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i8_v16i32 = call <16 x i32> @llvm.vp.sext.v16i32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i8_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i16_v16i32 = call <16 x i32> @llvm.vp.sext.v16i32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i16_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i32_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i1_v16i8 = call <16 x i8> @llvm.vp.sext.v16i8.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i1_v16i16 = call <16 x i16> @llvm.vp.sext.v16i16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i1_v16i32 = call <16 x i32> @llvm.vp.sext.v16i32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16i1_v16i64 = call <16 x i64> @llvm.vp.sext.v16i64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
@@ -217,6 +419,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = sext <32 x i1> undef to <32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i8_v32i16 = call <32 x i16> @llvm.vp.sext.v32i16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i8_v32i32 = call <32 x i32> @llvm.vp.sext.v32i32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i8_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i16_v32i32 = call <32 x i32> @llvm.vp.sext.v32i32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i16_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i32_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i1_v32i8 = call <32 x i8> @llvm.vp.sext.v32i8.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i1_v32i16 = call <32 x i16> @llvm.vp.sext.v32i16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32i1_v32i32 = call <32 x i32> @llvm.vp.sext.v32i32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32i1_v32i64 = call <32 x i64> @llvm.vp.sext.v32i64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
@@ -227,6 +439,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = sext <64 x i1> undef to <64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i8_v64i16 = call <64 x i16> @llvm.vp.sext.v64i16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i8_v64i32 = call <64 x i32> @llvm.vp.sext.v64i32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i8_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i16_v64i32 = call <64 x i32> @llvm.vp.sext.v64i32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i16_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64i32_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i1_v64i8 = call <64 x i8> @llvm.vp.sext.v64i8.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64i1_v64i16 = call <64 x i16> @llvm.vp.sext.v64i16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64i1_v64i32 = call <64 x i32> @llvm.vp.sext.v64i32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64i1_v64i64 = call <64 x i64> @llvm.vp.sext.v64i64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
@@ -237,6 +459,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = sext <128 x i1> undef to <128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v128i8_v128i16 = call <128 x i16> @llvm.vp.sext.v128i16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v128i8_v128i32 = call <128 x i32> @llvm.vp.sext.v128i32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_v128i8_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128i16_v128i32 = call <128 x i32> @llvm.vp.sext.v128i32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v128i16_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128i32_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v128i1_v128i8 = call <128 x i8> @llvm.vp.sext.v128i8.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v128i1_v128i16 = call <128 x i16> @llvm.vp.sext.v128i16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128i1_v128i32 = call <128 x i32> @llvm.vp.sext.v128i32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128i1_v128i64 = call <128 x i64> @llvm.vp.sext.v128i64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
@@ -247,6 +479,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = sext <256 x i1> undef to <256 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v256i8_v256i16 = call <256 x i16> @llvm.vp.sext.v256i16.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v256i8_v256i32 = call <256 x i32> @llvm.vp.sext.v256i32.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %vp_v256i8_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v256i16_v256i32 = call <256 x i32> @llvm.vp.sext.v256i32.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vp_v256i16_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %vp_v256i32_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v256i1_v256i8 = call <256 x i8> @llvm.vp.sext.v256i8.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_v256i1_v256i16 = call <256 x i16> @llvm.vp.sext.v256i16.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_v256i1_v256i32 = call <256 x i32> @llvm.vp.sext.v256i32.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vp_v256i1_v256i64 = call <256 x i64> @llvm.vp.sext.v256i64.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -257,6 +499,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i16 = sext <vscale x 1 x i1> undef to <vscale x 1 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i32 = sext <vscale x 1 x i1> undef to <vscale x 1 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.sext.nxv1i16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.sext.nxv1i8.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.sext.nxv1i16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
@@ -267,6 +519,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = sext <vscale x 2 x i1> undef to <vscale x 2 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.sext.nxv2i16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i8_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.sext.nxv2i8.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.sext.nxv2i16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2i1_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
@@ -277,6 +539,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = sext <vscale x 4 x i1> undef to <vscale x 4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i8_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.sext.nxv4i16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i8_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i8_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.sext.nxv4i8.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.sext.nxv4i16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i1_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4i1_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
@@ -287,6 +559,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = sext <vscale x 8 x i1> undef to <vscale x 8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i8_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.sext.nxv8i16.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i8_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i8_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i1_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.sext.nxv8i8.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i1_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.sext.nxv8i16.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i1_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8i1_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
@@ -297,6 +579,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = sext <vscale x 16 x i1> undef to <vscale x 16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i8_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.sext.nxv16i16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i8_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i8_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i1_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.sext.nxv16i8.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i1_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.sext.nxv16i16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16i1_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16i1_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
@@ -307,6 +599,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = sext <vscale x 32 x i1> undef to <vscale x 32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i8_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.sext.nxv32i16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i8_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i8_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32i32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i1_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.sext.nxv32i8.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv32i1_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.sext.nxv32i16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32i1_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32i1_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64>
@@ -317,6 +619,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = sext <vscale x 64 x i1> undef to <vscale x 64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv64i8_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.sext.nxv64i16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv64i8_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_nxv64i8_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64i16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_nxv64i16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64i32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv64i1_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.sext.nxv64i8.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv64i1_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.sext.nxv64i16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64i1_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64i1_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
 ; RV64-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128>
@@ -327,6 +639,16 @@ define void @sext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = sext <vscale x 128 x i1> undef to <vscale x 128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
 ; RV64-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv128i8_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.sext.nxv128i16.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_nxv128i8_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i32.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %vp_nxv128i8_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv128i16_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i32.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vp_nxv128i16_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %vp_nxv128i32_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i32(<vscale x 128 x i32> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_nxv128i1_nxv128i8 = call <vscale x 128 x i8> @llvm.vp.sext.nxv128i8.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_nxv128i1_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.sext.nxv128i16.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_nxv128i1_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i32.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vp_nxv128i1_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i64.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i8_v2i16 = sext <2 x i8> undef to <2 x i16>
@@ -340,6 +662,17 @@ define void @sext() {
   %v2i1_v2i32 = sext <2 x i1> undef to <2 x i32>
   %v2i1_v2i64 = sext <2 x i1> undef to <2 x i64>
 
+  %vp_v2i8_v2i16 = call <2 x i16> @llvm.vp.sext.v2i8.v2i16(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i8_v2i32 = call <2 x i32> @llvm.vp.sext.v2i8.v2i32(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i8_v2i64 = call <2 x i64> @llvm.vp.sext.v2i8.v2i64(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i32 = call <2 x i32> @llvm.vp.sext.v2i16.v2i32(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i64 = call <2 x i64> @llvm.vp.sext.v2i16.v2i64(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i32_v2i64 = call <2 x i64> @llvm.vp.sext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i8 = call <2 x i8> @llvm.vp.sext.v2i1.v2i8(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i16 = call <2 x i16> @llvm.vp.sext.v2i1.v2i16(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i32 = call <2 x i32> @llvm.vp.sext.v2i1.v2i32(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i64 = call <2 x i64> @llvm.vp.sext.v2i1.v2i64(<2 x i1> undef, <2 x i1> undef, i32 undef)
+
   %v4i8_v4i16 = sext <4 x i8> undef to <4 x i16>
   %v4i8_v4i32 = sext <4 x i8> undef to <4 x i32>
   %v4i8_v4i64 = sext <4 x i8> undef to <4 x i64>
@@ -351,6 +684,17 @@ define void @sext() {
   %v4i1_v4i32 = sext <4 x i1> undef to <4 x i32>
   %v4i1_v4i64 = sext <4 x i1> undef to <4 x i64>
 
+  %vp_v4i8_v4i16 = call <4 x i16> @llvm.vp.sext.v4i8.v4i16(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i8_v4i32 = call <4 x i32> @llvm.vp.sext.v4i8.v4i32(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i8_v4i64 = call <4 x i64> @llvm.vp.sext.v4i8.v4i64(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i16_v4i32 = call <4 x i32> @llvm.vp.sext.v4i16.v4i32(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i16_v4i64 = call <4 x i64> @llvm.vp.sext.v4i16.v4i64(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i32_v4i64 = call <4 x i64> @llvm.vp.sext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i8 = call <4 x i8> @llvm.vp.sext.v4i1.v4i8(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i16 = call <4 x i16> @llvm.vp.sext.v4i1.v4i16(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i32 = call <4 x i32> @llvm.vp.sext.v4i1.v4i32(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i64 = call <4 x i64> @llvm.vp.sext.v4i1.v4i64(<4 x i1> undef, <4 x i1> undef, i32 undef)
+
   %v8i8_v8i16 = sext <8 x i8> undef to <8 x i16>
   %v8i8_v8i32 = sext <8 x i8> undef to <8 x i32>
   %v8i8_v8i64 = sext <8 x i8> undef to <8 x i64>
@@ -362,6 +706,17 @@ define void @sext() {
   %v8i1_v8i32 = sext <8 x i1> undef to <8 x i32>
   %v8i1_v8i64 = sext <8 x i1> undef to <8 x i64>
 
+  %vp_v8i8_v8i16 = call <8 x i16> @llvm.vp.sext.v8i8.v8i16(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i8_v8i32 = call <8 x i32> @llvm.vp.sext.v8i8.v8i32(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i8_v8i64 = call <8 x i64> @llvm.vp.sext.v8i8.v8i64(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i16_v8i32 = call <8 x i32> @llvm.vp.sext.v8i16.v8i32(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i16_v8i64 = call <8 x i64> @llvm.vp.sext.v8i16.v8i64(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i32_v8i64 = call <8 x i64> @llvm.vp.sext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i8 = call <8 x i8> @llvm.vp.sext.v8i1.v8i8(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i16 = call <8 x i16> @llvm.vp.sext.v8i1.v8i16(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i32 = call <8 x i32> @llvm.vp.sext.v8i1.v8i32(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i64 = call <8 x i64> @llvm.vp.sext.v8i1.v8i64(<8 x i1> undef, <8 x i1> undef, i32 undef)
+
   %v16i8_v16i16 = sext <16 x i8> undef to <16 x i16>
   %v16i8_v16i32 = sext <16 x i8> undef to <16 x i32>
   %v16i8_v16i64 = sext <16 x i8> undef to <16 x i64>
@@ -373,6 +728,17 @@ define void @sext() {
   %v16i1_v16i32 = sext <16 x i1> undef to <16 x i32>
   %v16i1_v16i64 = sext <16 x i1> undef to <16 x i64>
 
+  %vp_v16i8_v16i16 = call <16 x i16> @llvm.vp.sext.v16i8.v16i16(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i8_v16i32 = call <16 x i32> @llvm.vp.sext.v16i8.v16i32(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i8_v16i64 = call <16 x i64> @llvm.vp.sext.v16i8.v16i64(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i16_v16i32 = call <16 x i32> @llvm.vp.sext.v16i16.v16i32(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i16_v16i64 = call <16 x i64> @llvm.vp.sext.v16i16.v16i64(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i32_v16i64 = call <16 x i64> @llvm.vp.sext.v16i32.v16i64(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i8 = call <16 x i8> @llvm.vp.sext.v16i1.v16i8(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i16 = call <16 x i16> @llvm.vp.sext.v16i1.v16i16(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i32 = call <16 x i32> @llvm.vp.sext.v16i1.v16i32(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i64 = call <16 x i64> @llvm.vp.sext.v16i1.v16i64(<16 x i1> undef, <16 x i1> undef, i32 undef)
+
   %v32i8_v32i16 = sext <32 x i8> undef to <32 x i16>
   %v32i8_v32i32 = sext <32 x i8> undef to <32 x i32>
   %v32i8_v32i64 = sext <32 x i8> undef to <32 x i64>
@@ -384,6 +750,17 @@ define void @sext() {
   %v32i1_v32i32 = sext <32 x i1> undef to <32 x i32>
   %v32i1_v32i64 = sext <32 x i1> undef to <32 x i64>
 
+  %vp_v32i8_v32i16 = call <32 x i16> @llvm.vp.sext.v32i8.v32i16(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i8_v32i32 = call <32 x i32> @llvm.vp.sext.v32i8.v32i32(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i8_v32i64 = call <32 x i64> @llvm.vp.sext.v32i8.v32i64(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i16_v32i32 = call <32 x i32> @llvm.vp.sext.v32i16.v32i32(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i16_v32i64 = call <32 x i64> @llvm.vp.sext.v32i16.v32i64(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i32_v32i64 = call <32 x i64> @llvm.vp.sext.v32i32.v32i64(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i8 = call <32 x i8> @llvm.vp.sext.v32i1.v32i8(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i16 = call <32 x i16> @llvm.vp.sext.v32i1.v32i16(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i32 = call <32 x i32> @llvm.vp.sext.v32i1.v32i32(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i64 = call <32 x i64> @llvm.vp.sext.v32i1.v32i64(<32 x i1> undef, <32 x i1> undef, i32 undef)
+
   %v64i8_v64i16 = sext <64 x i8> undef to <64 x i16>
   %v64i8_v64i32 = sext <64 x i8> undef to <64 x i32>
   %v64i8_v64i64 = sext <64 x i8> undef to <64 x i64>
@@ -395,6 +772,17 @@ define void @sext() {
   %v64i1_v64i32 = sext <64 x i1> undef to <64 x i32>
   %v64i1_v64i64 = sext <64 x i1> undef to <64 x i64>
 
+  %vp_v64i8_v64i16 = call <64 x i16> @llvm.vp.sext.v64i8.v64i16(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i8_v64i32 = call <64 x i32> @llvm.vp.sext.v64i8.v64i32(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i8_v64i64 = call <64 x i64> @llvm.vp.sext.v64i8.v64i64(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i16_v64i32 = call <64 x i32> @llvm.vp.sext.v64i16.v64i32(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i16_v64i64 = call <64 x i64> @llvm.vp.sext.v64i16.v64i64(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i32_v64i64 = call <64 x i64> @llvm.vp.sext.v64i32.v64i64(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i8 = call <64 x i8> @llvm.vp.sext.v64i1.v64i8(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i16 = call <64 x i16> @llvm.vp.sext.v64i1.v64i16(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i32 = call <64 x i32> @llvm.vp.sext.v64i1.v64i32(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i64 = call <64 x i64> @llvm.vp.sext.v64i1.v64i64(<64 x i1> undef, <64 x i1> undef, i32 undef)
+
   %v128i8_v128i16 = sext <128 x i8> undef to <128 x i16>
   %v128i8_v128i32 = sext <128 x i8> undef to <128 x i32>
   %v128i8_v128i64 = sext <128 x i8> undef to <128 x i64>
@@ -406,6 +794,17 @@ define void @sext() {
   %v128i1_v128i32 = sext <128 x i1> undef to <128 x i32>
   %v128i1_v128i64 = sext <128 x i1> undef to <128 x i64>
 
+  %vp_v128i8_v128i16 = call <128 x i16> @llvm.vp.sext.v128i8.v128i16(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i8_v128i32 = call <128 x i32> @llvm.vp.sext.v128i8.v128i32(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i8_v128i64 = call <128 x i64> @llvm.vp.sext.v128i8.v128i64(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i16_v128i32 = call <128 x i32> @llvm.vp.sext.v128i16.v128i32(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i16_v128i64 = call <128 x i64> @llvm.vp.sext.v128i16.v128i64(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i32_v128i64 = call <128 x i64> @llvm.vp.sext.v128i32.v128i64(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i8 = call <128 x i8> @llvm.vp.sext.v128i1.v128i8(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i16 = call <128 x i16> @llvm.vp.sext.v128i1.v128i16(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i32 = call <128 x i32> @llvm.vp.sext.v128i1.v128i32(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i64 = call <128 x i64> @llvm.vp.sext.v128i1.v128i64(<128 x i1> undef, <128 x i1> undef, i32 undef)
+
   %v256i8_v256i16 = sext <256 x i8> undef to <256 x i16>
   %v256i8_v256i32 = sext <256 x i8> undef to <256 x i32>
   %v256i8_v256i64 = sext <256 x i8> undef to <256 x i64>
@@ -417,6 +816,17 @@ define void @sext() {
   %v256i1_v256i32 = sext <256 x i1> undef to <256 x i32>
   %v256i1_v256i64 = sext <256 x i1> undef to <256 x i64>
 
+  %vp_v256i8_v256i16 = call <256 x i16> @llvm.vp.sext.v256i8.v256i16(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i8_v256i32 = call <256 x i32> @llvm.vp.sext.v256i8.v256i32(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i8_v256i64 = call <256 x i64> @llvm.vp.sext.v256i8.v256i64(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i16_v256i32 = call <256 x i32> @llvm.vp.sext.v256i16.v256i32(<256 x i16> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i16_v256i64 = call <256 x i64> @llvm.vp.sext.v256i16.v256i64(<256 x i16> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i32_v256i64 = call <256 x i64> @llvm.vp.sext.v256i32.v256i64(<256 x i32> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i8 = call <256 x i8> @llvm.vp.sext.v256i1.v256i8(<256 x i1> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i16 = call <256 x i16> @llvm.vp.sext.v256i1.v256i16(<256 x i1> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i32 = call <256 x i32> @llvm.vp.sext.v256i1.v256i32(<256 x i1> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i64 = call <256 x i64> @llvm.vp.sext.v256i1.v256i64(<256 x i1> undef, <256 x i1> undef, i32 undef)
+
   %nxv1i8_nxv1i16 = sext <vscale x 1 x i8> undef to <vscale x 1 x i16>
   %nxv1i8_nxv1i32 = sext <vscale x 1 x i8> undef to <vscale x 1 x i32>
   %nxv1i8_nxv1i64 = sext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -428,6 +838,17 @@ define void @sext() {
   %nxv1i1_nxv1i32 = sext <vscale x 1 x i1> undef to <vscale x 1 x i32>
   %nxv1i1_nxv1i64 = sext <vscale x 1 x i1> undef to <vscale x 1 x i64>
 
+  %vp_nxv1i8_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.sext.nxv1i8.nxv1i16(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i8_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i8.nxv1i32(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i8_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i8.nxv1i64(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i16.nxv1i32(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i16.nxv1i64(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.sext.nxv1i1.nxv1i8(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.sext.nxv1i1.nxv1i16(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.sext.nxv1i1.nxv1i32(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i1.nxv1i64(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2i8_nxv2i16 = sext <vscale x 2 x i8> undef to <vscale x 2 x i16>
   %nxv2i8_nxv2i32 = sext <vscale x 2 x i8> undef to <vscale x 2 x i32>
   %nxv2i8_nxv2i64 = sext <vscale x 2 x i8> undef to <vscale x 2 x i64>
@@ -439,6 +860,17 @@ define void @sext() {
   %nxv2i1_nxv2i32 = sext <vscale x 2 x i1> undef to <vscale x 2 x i32>
   %nxv2i1_nxv2i64 = sext <vscale x 2 x i1> undef to <vscale x 2 x i64>
 
+  %vp_nxv2i8_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.sext.nxv2i8.nxv2i16(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i8_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i8.nxv2i32(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i8_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i8.nxv2i64(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i16.nxv2i32(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i16.nxv2i64(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.sext.nxv2i1.nxv2i8(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.sext.nxv2i1.nxv2i16(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.sext.nxv2i1.nxv2i32(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i1.nxv2i64(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4i8_nxv4i16 = sext <vscale x 4 x i8> undef to <vscale x 4 x i16>
   %nxv4i8_nxv4i32 = sext <vscale x 4 x i8> undef to <vscale x 4 x i32>
   %nxv4i8_nxv4i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
@@ -450,6 +882,17 @@ define void @sext() {
   %nxv4i1_nxv4i32 = sext <vscale x 4 x i1> undef to <vscale x 4 x i32>
   %nxv4i1_nxv4i64 = sext <vscale x 4 x i1> undef to <vscale x 4 x i64>
 
+  %vp_nxv4i8_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.sext.nxv4i8.nxv4i16(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i8_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i8.nxv4i32(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i8_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i8.nxv4i64(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i16.nxv4i32(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i16.nxv4i64(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.sext.nxv4i1.nxv4i8(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.sext.nxv4i1.nxv4i16(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i1.nxv4i32(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i1.nxv4i64(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8i8_nxv8i16 = sext <vscale x 8 x i8> undef to <vscale x 8 x i16>
   %nxv8i8_nxv8i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
   %nxv8i8_nxv8i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
@@ -461,6 +904,17 @@ define void @sext() {
   %nxv8i1_nxv8i32 = sext <vscale x 8 x i1> undef to <vscale x 8 x i32>
   %nxv8i1_nxv8i64 = sext <vscale x 8 x i1> undef to <vscale x 8 x i64>
 
+  %vp_nxv8i8_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.sext.nxv8i8.nxv8i16(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i8_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i8.nxv8i32(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i8_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i8.nxv8i64(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i16.nxv8i64(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.sext.nxv8i1.nxv8i8(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.sext.nxv8i1.nxv8i16(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i1.nxv8i32(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i1.nxv8i64(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16i8_nxv16i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
   %nxv16i8_nxv16i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
   %nxv16i8_nxv16i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
@@ -472,6 +926,17 @@ define void @sext() {
   %nxv16i1_nxv16i32 = sext <vscale x 16 x i1> undef to <vscale x 16 x i32>
   %nxv16i1_nxv16i64 = sext <vscale x 16 x i1> undef to <vscale x 16 x i64>
 
+  %vp_nxv16i8_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.sext.nxv16i8.nxv16i16(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i8_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i8.nxv16i32(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i8_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i8.nxv16i64(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i16.nxv16i32(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i16.nxv16i64(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i32.nxv16i64(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.sext.nxv16i1.nxv16i8(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.sext.nxv16i1.nxv16i16(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.sext.nxv16i1.nxv16i32(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.sext.nxv16i1.nxv16i64(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32i8_nxv32i16 = sext <vscale x 32 x i8> undef to <vscale x 32 x i16>
   %nxv32i8_nxv32i32 = sext <vscale x 32 x i8> undef to <vscale x 32 x i32>
   %nxv32i8_nxv32i64 = sext <vscale x 32 x i8> undef to <vscale x 32 x i64>
@@ -483,6 +948,17 @@ define void @sext() {
   %nxv32i1_nxv32i32 = sext <vscale x 32 x i1> undef to <vscale x 32 x i32>
   %nxv32i1_nxv32i64 = sext <vscale x 32 x i1> undef to <vscale x 32 x i64>
 
+  %vp_nxv32i8_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.sext.nxv32i8.nxv32i16(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i8_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i8.nxv32i32(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i8_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i8.nxv32i64(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i16.nxv32i32(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i16.nxv32i64(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i32.nxv32i64(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.sext.nxv32i1.nxv32i8(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.sext.nxv32i1.nxv32i16(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.sext.nxv32i1.nxv32i32(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.sext.nxv32i1.nxv32i64(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64i8_nxv64i16 = sext <vscale x 64 x i8> undef to <vscale x 64 x i16>
   %nxv64i8_nxv64i32 = sext <vscale x 64 x i8> undef to <vscale x 64 x i32>
   %nxv64i8_nxv64i64 = sext <vscale x 64 x i8> undef to <vscale x 64 x i64>
@@ -494,6 +970,17 @@ define void @sext() {
   %nxv64i1_nxv64i32 = sext <vscale x 64 x i1> undef to <vscale x 64 x i32>
   %nxv64i1_nxv64i64 = sext <vscale x 64 x i1> undef to <vscale x 64 x i64>
 
+  %vp_nxv64i8_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.sext.nxv64i8.nxv64i16(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i8_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i8.nxv64i32(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i8_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i8.nxv64i64(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i16.nxv64i32(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i16.nxv64i64(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i32.nxv64i64(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.sext.nxv64i1.nxv64i8(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.sext.nxv64i1.nxv64i16(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.sext.nxv64i1.nxv64i32(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.sext.nxv64i1.nxv64i64(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+
   %nxv128i8_nxv128i16 = sext <vscale x 128 x i8> undef to <vscale x 128 x i16>
   %nxv128i8_nxv128i32 = sext <vscale x 128 x i8> undef to <vscale x 128 x i32>
   %nxv128i8_nxv128i128 = sext <vscale x 128 x i8> undef to <vscale x 128 x i128>
@@ -505,6 +992,17 @@ define void @sext() {
   %nxv128i1_nxv128i32 = sext <vscale x 128 x i1> undef to <vscale x 128 x i32>
   %nxv128i1_nxv128i128 = sext <vscale x 128 x i1> undef to <vscale x 128 x i128>
 
+  %vp_nxv128i8_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.sext.nxv128i8.nxv128i16(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i8_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i8.nxv128i32(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i8_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i8.nxv128i64(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i16_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i16.nxv128i32(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i16_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i16.nxv128i64(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i32_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i32.nxv128i64(<vscale x 128 x i32> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i8 = call <vscale x 128 x i8> @llvm.vp.sext.nxv128i1.nxv128i8(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.sext.nxv128i1.nxv128i16(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.sext.nxv128i1.nxv128i32(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.sext.nxv128i1.nxv128i64(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -520,6 +1018,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i16 = zext <2 x i1> undef to <2 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i32 = zext <2 x i1> undef to <2 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i16 = call <2 x i16> @llvm.vp.zext.v2i16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i32 = call <2 x i32> @llvm.vp.zext.v2i32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i32 = call <2 x i32> @llvm.vp.zext.v2i32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i32_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i8 = call <2 x i8> @llvm.vp.zext.v2i8.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i16 = call <2 x i16> @llvm.vp.zext.v2i16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i32 = call <2 x i32> @llvm.vp.zext.v2i32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
@@ -530,6 +1038,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = zext <4 x i1> undef to <4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i16 = call <4 x i16> @llvm.vp.zext.v4i16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i32 = call <4 x i32> @llvm.vp.zext.v4i32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i8_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i16_v4i32 = call <4 x i32> @llvm.vp.zext.v4i32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i16_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i8 = call <4 x i8> @llvm.vp.zext.v4i8.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i16 = call <4 x i16> @llvm.vp.zext.v4i16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i32 = call <4 x i32> @llvm.vp.zext.v4i32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4i1_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
@@ -540,6 +1058,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = zext <8 x i1> undef to <8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i8_v8i16 = call <8 x i16> @llvm.vp.zext.v8i16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i8_v8i32 = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i8_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i16_v8i32 = call <8 x i32> @llvm.vp.zext.v8i32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i16_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i32_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i8 = call <8 x i8> @llvm.vp.zext.v8i8.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i16 = call <8 x i16> @llvm.vp.zext.v8i16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i1_v8i32 = call <8 x i32> @llvm.vp.zext.v8i32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8i1_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
@@ -550,6 +1078,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i8_v16i16 = call <16 x i16> @llvm.vp.zext.v16i16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i8_v16i32 = call <16 x i32> @llvm.vp.zext.v16i32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i8_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i16_v16i32 = call <16 x i32> @llvm.vp.zext.v16i32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i16_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i32_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i1_v16i8 = call <16 x i8> @llvm.vp.zext.v16i8.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i1_v16i16 = call <16 x i16> @llvm.vp.zext.v16i16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i1_v16i32 = call <16 x i32> @llvm.vp.zext.v16i32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16i1_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
@@ -560,6 +1098,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i8_v32i16 = call <32 x i16> @llvm.vp.zext.v32i16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i8_v32i32 = call <32 x i32> @llvm.vp.zext.v32i32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i8_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i16_v32i32 = call <32 x i32> @llvm.vp.zext.v32i32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i16_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i32_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i1_v32i8 = call <32 x i8> @llvm.vp.zext.v32i8.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i1_v32i16 = call <32 x i16> @llvm.vp.zext.v32i16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32i1_v32i32 = call <32 x i32> @llvm.vp.zext.v32i32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32i1_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
@@ -570,6 +1118,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i8_v64i16 = call <64 x i16> @llvm.vp.zext.v64i16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i8_v64i32 = call <64 x i32> @llvm.vp.zext.v64i32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i8_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i16_v64i32 = call <64 x i32> @llvm.vp.zext.v64i32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i16_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64i32_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i1_v64i8 = call <64 x i8> @llvm.vp.zext.v64i8.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64i1_v64i16 = call <64 x i16> @llvm.vp.zext.v64i16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64i1_v64i32 = call <64 x i32> @llvm.vp.zext.v64i32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64i1_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
@@ -580,6 +1138,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v128i8_v128i16 = call <128 x i16> @llvm.vp.zext.v128i16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v128i8_v128i32 = call <128 x i32> @llvm.vp.zext.v128i32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_v128i8_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128i16_v128i32 = call <128 x i32> @llvm.vp.zext.v128i32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v128i16_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128i32_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v128i1_v128i8 = call <128 x i8> @llvm.vp.zext.v128i8.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v128i1_v128i16 = call <128 x i16> @llvm.vp.zext.v128i16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128i1_v128i32 = call <128 x i32> @llvm.vp.zext.v128i32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128i1_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
@@ -590,6 +1158,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v256i8_v256i16 = call <256 x i16> @llvm.vp.zext.v256i16.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v256i8_v256i32 = call <256 x i32> @llvm.vp.zext.v256i32.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %vp_v256i8_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v256i16_v256i32 = call <256 x i32> @llvm.vp.zext.v256i32.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vp_v256i16_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %vp_v256i32_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v256i1_v256i8 = call <256 x i8> @llvm.vp.zext.v256i8.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_v256i1_v256i16 = call <256 x i16> @llvm.vp.zext.v256i16.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_v256i1_v256i32 = call <256 x i32> @llvm.vp.zext.v256i32.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vp_v256i1_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -600,6 +1178,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i16 = zext <vscale x 1 x i1> undef to <vscale x 1 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i32 = zext <vscale x 1 x i1> undef to <vscale x 1 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.zext.nxv1i8.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
@@ -610,6 +1198,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = zext <vscale x 2 x i1> undef to <vscale x 2 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i8_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.zext.nxv2i8.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2i1_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
@@ -620,6 +1218,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = zext <vscale x 4 x i1> undef to <vscale x 4 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i8_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.zext.nxv4i16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i8_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i8_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.zext.nxv4i8.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.zext.nxv4i16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i1_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4i1_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
@@ -630,6 +1238,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i8_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.zext.nxv8i16.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i8_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i8_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i1_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.zext.nxv8i8.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i1_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.zext.nxv8i16.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i1_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8i1_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
@@ -640,6 +1258,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i8_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i8_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i8_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i1_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.zext.nxv16i8.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i1_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16i1_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16i1_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
@@ -650,6 +1278,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i8_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.zext.nxv32i16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i8_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i8_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32i32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i1_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.zext.nxv32i8.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv32i1_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.zext.nxv32i16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32i1_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32i1_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64>
@@ -660,6 +1298,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv64i8_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.zext.nxv64i16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv64i8_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64i8_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64i16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_nxv64i16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %vp_nxv64i32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv64i1_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.zext.nxv64i8.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv64i1_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.zext.nxv64i16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64i1_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64i1_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128>
@@ -670,6 +1318,16 @@ define void @zext() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv128i8_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.zext.nxv128i16.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_nxv128i8_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i32.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv128i8_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv128i16_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i32.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 143 for instruction: %vp_nxv128i16_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 139 for instruction: %vp_nxv128i32_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i32(<vscale x 128 x i32> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_nxv128i1_nxv128i8 = call <vscale x 128 x i8> @llvm.vp.zext.nxv128i8.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_nxv128i1_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.zext.nxv128i16.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_nxv128i1_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i32.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv128i1_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'zext'
@@ -683,6 +1341,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i16 = zext <2 x i1> undef to <2 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i32 = zext <2 x i1> undef to <2 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i16 = call <2 x i16> @llvm.vp.zext.v2i16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i32 = call <2 x i32> @llvm.vp.zext.v2i32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i8_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i32 = call <2 x i32> @llvm.vp.zext.v2i32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i32_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i8 = call <2 x i8> @llvm.vp.zext.v2i8.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i16 = call <2 x i16> @llvm.vp.zext.v2i16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i32 = call <2 x i32> @llvm.vp.zext.v2i32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i1_v2i64 = call <2 x i64> @llvm.vp.zext.v2i64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
@@ -693,6 +1361,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i16 = zext <4 x i1> undef to <4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i16 = call <4 x i16> @llvm.vp.zext.v4i16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i8_v4i32 = call <4 x i32> @llvm.vp.zext.v4i32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i8_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i16_v4i32 = call <4 x i32> @llvm.vp.zext.v4i32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i16_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i8 = call <4 x i8> @llvm.vp.zext.v4i8.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i16 = call <4 x i16> @llvm.vp.zext.v4i16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i1_v4i32 = call <4 x i32> @llvm.vp.zext.v4i32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4i1_v4i64 = call <4 x i64> @llvm.vp.zext.v4i64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
@@ -703,6 +1381,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i1_v8i16 = zext <8 x i1> undef to <8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i8_v8i16 = call <8 x i16> @llvm.vp.zext.v8i16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i8_v8i32 = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i8_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i16_v8i32 = call <8 x i32> @llvm.vp.zext.v8i32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i16_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i32_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i8 = call <8 x i8> @llvm.vp.zext.v8i8.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i1_v8i16 = call <8 x i16> @llvm.vp.zext.v8i16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i1_v8i32 = call <8 x i32> @llvm.vp.zext.v8i32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8i1_v8i64 = call <8 x i64> @llvm.vp.zext.v8i64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
@@ -713,6 +1401,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16i1_v16i16 = zext <16 x i1> undef to <16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i8_v16i16 = call <16 x i16> @llvm.vp.zext.v16i16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i8_v16i32 = call <16 x i32> @llvm.vp.zext.v16i32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i8_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i16_v16i32 = call <16 x i32> @llvm.vp.zext.v16i32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i16_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i32_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i1_v16i8 = call <16 x i8> @llvm.vp.zext.v16i8.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i1_v16i16 = call <16 x i16> @llvm.vp.zext.v16i16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i1_v16i32 = call <16 x i32> @llvm.vp.zext.v16i32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16i1_v16i64 = call <16 x i64> @llvm.vp.zext.v16i64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
@@ -723,6 +1421,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i1_v32i16 = zext <32 x i1> undef to <32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i8_v32i16 = call <32 x i16> @llvm.vp.zext.v32i16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i8_v32i32 = call <32 x i32> @llvm.vp.zext.v32i32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i8_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i16_v32i32 = call <32 x i32> @llvm.vp.zext.v32i32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i16_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32i32_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i1_v32i8 = call <32 x i8> @llvm.vp.zext.v32i8.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i1_v32i16 = call <32 x i16> @llvm.vp.zext.v32i16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32i1_v32i32 = call <32 x i32> @llvm.vp.zext.v32i32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v32i1_v32i64 = call <32 x i64> @llvm.vp.zext.v32i64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
@@ -733,6 +1441,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i1_v64i16 = zext <64 x i1> undef to <64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i8_v64i16 = call <64 x i16> @llvm.vp.zext.v64i16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i8_v64i32 = call <64 x i32> @llvm.vp.zext.v64i32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i8_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64i16_v64i32 = call <64 x i32> @llvm.vp.zext.v64i32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v64i16_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64i32_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i1_v64i8 = call <64 x i8> @llvm.vp.zext.v64i8.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64i1_v64i16 = call <64 x i16> @llvm.vp.zext.v64i16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64i1_v64i32 = call <64 x i32> @llvm.vp.zext.v64i32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v64i1_v64i64 = call <64 x i64> @llvm.vp.zext.v64i64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
@@ -743,6 +1461,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i1_v128i16 = zext <128 x i1> undef to <128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v128i8_v128i16 = call <128 x i16> @llvm.vp.zext.v128i16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_v128i8_v128i32 = call <128 x i32> @llvm.vp.zext.v128i32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_v128i8_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128i16_v128i32 = call <128 x i32> @llvm.vp.zext.v128i32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v128i16_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128i32_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v128i1_v128i8 = call <128 x i8> @llvm.vp.zext.v128i8.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v128i1_v128i16 = call <128 x i16> @llvm.vp.zext.v128i16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128i1_v128i32 = call <128 x i32> @llvm.vp.zext.v128i32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_v128i1_v128i64 = call <128 x i64> @llvm.vp.zext.v128i64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
@@ -753,6 +1481,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i1_v256i16 = zext <256 x i1> undef to <256 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v256i8_v256i16 = call <256 x i16> @llvm.vp.zext.v256i16.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_v256i8_v256i32 = call <256 x i32> @llvm.vp.zext.v256i32.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %vp_v256i8_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v256i16_v256i32 = call <256 x i32> @llvm.vp.zext.v256i32.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vp_v256i16_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %vp_v256i32_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v256i1_v256i8 = call <256 x i8> @llvm.vp.zext.v256i8.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_v256i1_v256i16 = call <256 x i16> @llvm.vp.zext.v256i16.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_v256i1_v256i32 = call <256 x i32> @llvm.vp.zext.v256i32.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vp_v256i1_v256i64 = call <256 x i64> @llvm.vp.zext.v256i64.v256i1(<256 x i1> undef, <256 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -763,6 +1501,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i16 = zext <vscale x 1 x i1> undef to <vscale x 1 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i32 = zext <vscale x 1 x i1> undef to <vscale x 1 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i8_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.zext.nxv1i8.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i1_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
@@ -773,6 +1521,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i16 = zext <vscale x 2 x i1> undef to <vscale x 2 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i8_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i8_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.zext.nxv2i8.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i1_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2i1_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
@@ -783,6 +1541,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i1_nxv4i16 = zext <vscale x 4 x i1> undef to <vscale x 4 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i8_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.zext.nxv4i16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i8_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i8_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.zext.nxv4i8.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i1_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.zext.nxv4i16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i1_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4i1_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
@@ -793,6 +1561,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i1_nxv8i16 = zext <vscale x 8 x i1> undef to <vscale x 8 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i8_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.zext.nxv8i16.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i8_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i8_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i1_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.zext.nxv8i8.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i1_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.zext.nxv8i16.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i1_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8i1_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
@@ -803,6 +1581,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_nxv16i16 = zext <vscale x 16 x i1> undef to <vscale x 16 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i8_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i8_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i8_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16i32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i1_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.zext.nxv16i8.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i1_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16i1_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16i1_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
@@ -813,6 +1601,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i1_nxv32i16 = zext <vscale x 32 x i1> undef to <vscale x 32 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i8_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.zext.nxv32i16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i8_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i8_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32i16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv32i16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32i32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i1_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.zext.nxv32i8.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv32i1_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.zext.nxv32i16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32i1_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32i1_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64>
@@ -823,6 +1621,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i1_nxv64i16 = zext <vscale x 64 x i1> undef to <vscale x 64 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv64i8_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.zext.nxv64i16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %vp_nxv64i8_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %vp_nxv64i8_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64i16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_nxv64i16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64i32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv64i1_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.zext.nxv64i8.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv64i1_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.zext.nxv64i16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64i1_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64i1_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
 ; RV64-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128>
@@ -833,6 +1641,16 @@ define void @zext() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %nxv128i1_nxv128i16 = zext <vscale x 128 x i1> undef to <vscale x 128 x i16>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
 ; RV64-NEXT:  Cost Model: Invalid cost for instruction: %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv128i8_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.zext.nxv128i16.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %vp_nxv128i8_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i32.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 142 for instruction: %vp_nxv128i8_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i8(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv128i16_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i32.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %vp_nxv128i16_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i16(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %vp_nxv128i32_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i32(<vscale x 128 x i32> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_nxv128i1_nxv128i8 = call <vscale x 128 x i8> @llvm.vp.zext.nxv128i8.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_nxv128i1_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.zext.nxv128i16.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_nxv128i1_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i32.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 270 for instruction: %vp_nxv128i1_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i64.nxv128i1(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i8_v2i16 = zext <2 x i8> undef to <2 x i16>
@@ -846,6 +1664,17 @@ define void @zext() {
   %v2i1_v2i32 = zext <2 x i1> undef to <2 x i32>
   %v2i1_v2i64 = zext <2 x i1> undef to <2 x i64>
 
+  %vp_v2i8_v2i16 = call <2 x i16> @llvm.vp.zext.v2i8.v2i16(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i8_v2i32 = call <2 x i32> @llvm.vp.zext.v2i8.v2i32(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i8_v2i64 = call <2 x i64> @llvm.vp.zext.v2i8.v2i64(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i32 = call <2 x i32> @llvm.vp.zext.v2i16.v2i32(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i64 = call <2 x i64> @llvm.vp.zext.v2i16.v2i64(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i32_v2i64 = call <2 x i64> @llvm.vp.zext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i8 = call <2 x i8> @llvm.vp.zext.v2i1.v2i8(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i16 = call <2 x i16> @llvm.vp.zext.v2i1.v2i16(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i32 = call <2 x i32> @llvm.vp.zext.v2i1.v2i32(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i1_v2i64 = call <2 x i64> @llvm.vp.zext.v2i1.v2i64(<2 x i1> undef, <2 x i1> undef, i32 undef)
+
   %v4i8_v4i16 = zext <4 x i8> undef to <4 x i16>
   %v4i8_v4i32 = zext <4 x i8> undef to <4 x i32>
   %v4i8_v4i64 = zext <4 x i8> undef to <4 x i64>
@@ -857,6 +1686,17 @@ define void @zext() {
   %v4i1_v4i32 = zext <4 x i1> undef to <4 x i32>
   %v4i1_v4i64 = zext <4 x i1> undef to <4 x i64>
 
+  %vp_v4i8_v4i16 = call <4 x i16> @llvm.vp.zext.v4i8.v4i16(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i8_v4i32 = call <4 x i32> @llvm.vp.zext.v4i8.v4i32(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i8_v4i64 = call <4 x i64> @llvm.vp.zext.v4i8.v4i64(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i16_v4i32 = call <4 x i32> @llvm.vp.zext.v4i16.v4i32(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i16_v4i64 = call <4 x i64> @llvm.vp.zext.v4i16.v4i64(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i32_v4i64 = call <4 x i64> @llvm.vp.zext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i8 = call <4 x i8> @llvm.vp.zext.v4i1.v4i8(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i16 = call <4 x i16> @llvm.vp.zext.v4i1.v4i16(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i32 = call <4 x i32> @llvm.vp.zext.v4i1.v4i32(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i1_v4i64 = call <4 x i64> @llvm.vp.zext.v4i1.v4i64(<4 x i1> undef, <4 x i1> undef, i32 undef)
+
   %v8i8_v8i16 = zext <8 x i8> undef to <8 x i16>
   %v8i8_v8i32 = zext <8 x i8> undef to <8 x i32>
   %v8i8_v8i64 = zext <8 x i8> undef to <8 x i64>
@@ -868,6 +1708,17 @@ define void @zext() {
   %v8i1_v8i32 = zext <8 x i1> undef to <8 x i32>
   %v8i1_v8i64 = zext <8 x i1> undef to <8 x i64>
 
+  %vp_v8i8_v8i16 = call <8 x i16> @llvm.vp.zext.v8i8.v8i16(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i8_v8i32 = call <8 x i32> @llvm.vp.zext.v8i8.v8i32(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i8_v8i64 = call <8 x i64> @llvm.vp.zext.v8i8.v8i64(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i16_v8i32 = call <8 x i32> @llvm.vp.zext.v8i16.v8i32(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i16_v8i64 = call <8 x i64> @llvm.vp.zext.v8i16.v8i64(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i32_v8i64 = call <8 x i64> @llvm.vp.zext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i8 = call <8 x i8> @llvm.vp.zext.v8i1.v8i8(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i16 = call <8 x i16> @llvm.vp.zext.v8i1.v8i16(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i32 = call <8 x i32> @llvm.vp.zext.v8i1.v8i32(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i1_v8i64 = call <8 x i64> @llvm.vp.zext.v8i1.v8i64(<8 x i1> undef, <8 x i1> undef, i32 undef)
+
   %v16i8_v16i16 = zext <16 x i8> undef to <16 x i16>
   %v16i8_v16i32 = zext <16 x i8> undef to <16 x i32>
   %v16i8_v16i64 = zext <16 x i8> undef to <16 x i64>
@@ -879,6 +1730,17 @@ define void @zext() {
   %v16i1_v16i32 = zext <16 x i1> undef to <16 x i32>
   %v16i1_v16i64 = zext <16 x i1> undef to <16 x i64>
 
+  %vp_v16i8_v16i16 = call <16 x i16> @llvm.vp.zext.v16i8.v16i16(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i8_v16i32 = call <16 x i32> @llvm.vp.zext.v16i8.v16i32(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i8_v16i64 = call <16 x i64> @llvm.vp.zext.v16i8.v16i64(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i16_v16i32 = call <16 x i32> @llvm.vp.zext.v16i16.v16i32(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i16_v16i64 = call <16 x i64> @llvm.vp.zext.v16i16.v16i64(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i32_v16i64 = call <16 x i64> @llvm.vp.zext.v16i32.v16i64(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i8 = call <16 x i8> @llvm.vp.zext.v16i1.v16i8(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i16 = call <16 x i16> @llvm.vp.zext.v16i1.v16i16(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i32 = call <16 x i32> @llvm.vp.zext.v16i1.v16i32(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i1_v16i64 = call <16 x i64> @llvm.vp.zext.v16i1.v16i64(<16 x i1> undef, <16 x i1> undef, i32 undef)
+
   %v32i8_v32i16 = zext <32 x i8> undef to <32 x i16>
   %v32i8_v32i32 = zext <32 x i8> undef to <32 x i32>
   %v32i8_v32i64 = zext <32 x i8> undef to <32 x i64>
@@ -890,6 +1752,17 @@ define void @zext() {
   %v32i1_v32i32 = zext <32 x i1> undef to <32 x i32>
   %v32i1_v32i64 = zext <32 x i1> undef to <32 x i64>
 
+  %vp_v32i8_v32i16 = call <32 x i16> @llvm.vp.zext.v32i8.v32i16(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i8_v32i32 = call <32 x i32> @llvm.vp.zext.v32i8.v32i32(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i8_v32i64 = call <32 x i64> @llvm.vp.zext.v32i8.v32i64(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i16_v32i32 = call <32 x i32> @llvm.vp.zext.v32i16.v32i32(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i16_v32i64 = call <32 x i64> @llvm.vp.zext.v32i16.v32i64(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i32_v32i64 = call <32 x i64> @llvm.vp.zext.v32i32.v32i64(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i8 = call <32 x i8> @llvm.vp.zext.v32i1.v32i8(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i16 = call <32 x i16> @llvm.vp.zext.v32i1.v32i16(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i32 = call <32 x i32> @llvm.vp.zext.v32i1.v32i32(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i1_v32i64 = call <32 x i64> @llvm.vp.zext.v32i1.v32i64(<32 x i1> undef, <32 x i1> undef, i32 undef)
+
   %v64i8_v64i16 = zext <64 x i8> undef to <64 x i16>
   %v64i8_v64i32 = zext <64 x i8> undef to <64 x i32>
   %v64i8_v64i64 = zext <64 x i8> undef to <64 x i64>
@@ -901,6 +1774,17 @@ define void @zext() {
   %v64i1_v64i32 = zext <64 x i1> undef to <64 x i32>
   %v64i1_v64i64 = zext <64 x i1> undef to <64 x i64>
 
+  %vp_v64i8_v64i16 = call <64 x i16> @llvm.vp.zext.v64i8.v64i16(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i8_v64i32 = call <64 x i32> @llvm.vp.zext.v64i8.v64i32(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i8_v64i64 = call <64 x i64> @llvm.vp.zext.v64i8.v64i64(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i16_v64i32 = call <64 x i32> @llvm.vp.zext.v64i16.v64i32(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i16_v64i64 = call <64 x i64> @llvm.vp.zext.v64i16.v64i64(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i32_v64i64 = call <64 x i64> @llvm.vp.zext.v64i32.v64i64(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i8 = call <64 x i8> @llvm.vp.zext.v64i1.v64i8(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i16 = call <64 x i16> @llvm.vp.zext.v64i1.v64i16(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i32 = call <64 x i32> @llvm.vp.zext.v64i1.v64i32(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i1_v64i64 = call <64 x i64> @llvm.vp.zext.v64i1.v64i64(<64 x i1> undef, <64 x i1> undef, i32 undef)
+
   %v128i8_v128i16 = zext <128 x i8> undef to <128 x i16>
   %v128i8_v128i32 = zext <128 x i8> undef to <128 x i32>
   %v128i8_v128i64 = zext <128 x i8> undef to <128 x i64>
@@ -912,6 +1796,17 @@ define void @zext() {
   %v128i1_v128i32 = zext <128 x i1> undef to <128 x i32>
   %v128i1_v128i64 = zext <128 x i1> undef to <128 x i64>
 
+  %vp_v128i8_v128i16 = call <128 x i16> @llvm.vp.zext.v128i8.v128i16(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i8_v128i32 = call <128 x i32> @llvm.vp.zext.v128i8.v128i32(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i8_v128i64 = call <128 x i64> @llvm.vp.zext.v128i8.v128i64(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i16_v128i32 = call <128 x i32> @llvm.vp.zext.v128i16.v128i32(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i16_v128i64 = call <128 x i64> @llvm.vp.zext.v128i16.v128i64(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i32_v128i64 = call <128 x i64> @llvm.vp.zext.v128i32.v128i64(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i8 = call <128 x i8> @llvm.vp.zext.v128i1.v128i8(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i16 = call <128 x i16> @llvm.vp.zext.v128i1.v128i16(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i32 = call <128 x i32> @llvm.vp.zext.v128i1.v128i32(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i1_v128i64 = call <128 x i64> @llvm.vp.zext.v128i1.v128i64(<128 x i1> undef, <128 x i1> undef, i32 undef)
+
   %v256i8_v256i16 = zext <256 x i8> undef to <256 x i16>
   %v256i8_v256i32 = zext <256 x i8> undef to <256 x i32>
   %v256i8_v256i64 = zext <256 x i8> undef to <256 x i64>
@@ -923,6 +1818,17 @@ define void @zext() {
   %v256i1_v256i32 = zext <256 x i1> undef to <256 x i32>
   %v256i1_v256i64 = zext <256 x i1> undef to <256 x i64>
 
+  %vp_v256i8_v256i16 = call <256 x i16> @llvm.vp.zext.v256i8.v256i16(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i8_v256i32 = call <256 x i32> @llvm.vp.zext.v256i8.v256i32(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i8_v256i64 = call <256 x i64> @llvm.vp.zext.v256i8.v256i64(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i16_v256i32 = call <256 x i32> @llvm.vp.zext.v256i16.v256i32(<256 x i16> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i16_v256i64 = call <256 x i64> @llvm.vp.zext.v256i16.v256i64(<256 x i16> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i32_v256i64 = call <256 x i64> @llvm.vp.zext.v256i32.v256i64(<256 x i32> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i8 = call <256 x i8> @llvm.vp.zext.v256i1.v256i8(<256 x i1> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i16 = call <256 x i16> @llvm.vp.zext.v256i1.v256i16(<256 x i1> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i32 = call <256 x i32> @llvm.vp.zext.v256i1.v256i32(<256 x i1> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i1_v256i64 = call <256 x i64> @llvm.vp.zext.v256i1.v256i64(<256 x i1> undef, <256 x i1> undef, i32 undef)
+
   %nxv1i8_nxv1i16 = zext <vscale x 1 x i8> undef to <vscale x 1 x i16>
   %nxv1i8_nxv1i32 = zext <vscale x 1 x i8> undef to <vscale x 1 x i32>
   %nxv1i8_nxv1i64 = zext <vscale x 1 x i8> undef to <vscale x 1 x i64>
@@ -934,6 +1840,17 @@ define void @zext() {
   %nxv1i1_nxv1i32 = zext <vscale x 1 x i1> undef to <vscale x 1 x i32>
   %nxv1i1_nxv1i64 = zext <vscale x 1 x i1> undef to <vscale x 1 x i64>
 
+  %vp_nxv1i8_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i8.nxv1i16(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i8_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i8.nxv1i32(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i8_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i8.nxv1i64(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i16.nxv1i32(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i16.nxv1i64(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.zext.nxv1i1.nxv1i8(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i1.nxv1i16(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.zext.nxv1i1.nxv1i32(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i1_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i1.nxv1i64(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2i8_nxv2i16 = zext <vscale x 2 x i8> undef to <vscale x 2 x i16>
   %nxv2i8_nxv2i32 = zext <vscale x 2 x i8> undef to <vscale x 2 x i32>
   %nxv2i8_nxv2i64 = zext <vscale x 2 x i8> undef to <vscale x 2 x i64>
@@ -945,6 +1862,17 @@ define void @zext() {
   %nxv2i1_nxv2i32 = zext <vscale x 2 x i1> undef to <vscale x 2 x i32>
   %nxv2i1_nxv2i64 = zext <vscale x 2 x i1> undef to <vscale x 2 x i64>
 
+  %vp_nxv2i8_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i8.nxv2i16(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i8_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i8.nxv2i32(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i8_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i8.nxv2i64(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i16.nxv2i32(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i16.nxv2i64(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.zext.nxv2i1.nxv2i8(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.zext.nxv2i1.nxv2i16(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i1.nxv2i32(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i1_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i1.nxv2i64(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4i8_nxv4i16 = zext <vscale x 4 x i8> undef to <vscale x 4 x i16>
   %nxv4i8_nxv4i32 = zext <vscale x 4 x i8> undef to <vscale x 4 x i32>
   %nxv4i8_nxv4i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
@@ -956,6 +1884,17 @@ define void @zext() {
   %nxv4i1_nxv4i32 = zext <vscale x 4 x i1> undef to <vscale x 4 x i32>
   %nxv4i1_nxv4i64 = zext <vscale x 4 x i1> undef to <vscale x 4 x i64>
 
+  %vp_nxv4i8_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.zext.nxv4i8.nxv4i16(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i8_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i8.nxv4i32(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i8_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i8.nxv4i64(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i16.nxv4i32(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i16.nxv4i64(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.zext.nxv4i1.nxv4i8(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.zext.nxv4i1.nxv4i16(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.zext.nxv4i1.nxv4i32(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i1_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i1.nxv4i64(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8i8_nxv8i16 = zext <vscale x 8 x i8> undef to <vscale x 8 x i16>
   %nxv8i8_nxv8i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
   %nxv8i8_nxv8i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
@@ -967,6 +1906,17 @@ define void @zext() {
   %nxv8i1_nxv8i32 = zext <vscale x 8 x i1> undef to <vscale x 8 x i32>
   %nxv8i1_nxv8i64 = zext <vscale x 8 x i1> undef to <vscale x 8 x i64>
 
+  %vp_nxv8i8_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.zext.nxv8i8.nxv8i16(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i8_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i8.nxv8i32(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i8_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i8.nxv8i64(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i16.nxv8i64(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.zext.nxv8i1.nxv8i8(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.zext.nxv8i1.nxv8i16(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i1.nxv8i32(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i1_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i1.nxv8i64(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16i8_nxv16i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
   %nxv16i8_nxv16i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
   %nxv16i8_nxv16i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
@@ -978,6 +1928,17 @@ define void @zext() {
   %nxv16i1_nxv16i32 = zext <vscale x 16 x i1> undef to <vscale x 16 x i32>
   %nxv16i1_nxv16i64 = zext <vscale x 16 x i1> undef to <vscale x 16 x i64>
 
+  %vp_nxv16i8_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i8.nxv16i16(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i8_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i8.nxv16i32(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i8_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i8.nxv16i64(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i16.nxv16i32(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i16.nxv16i64(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i32.nxv16i64(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.zext.nxv16i1.nxv16i8(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i1.nxv16i16(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.zext.nxv16i1.nxv16i32(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i1_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.zext.nxv16i1.nxv16i64(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32i8_nxv32i16 = zext <vscale x 32 x i8> undef to <vscale x 32 x i16>
   %nxv32i8_nxv32i32 = zext <vscale x 32 x i8> undef to <vscale x 32 x i32>
   %nxv32i8_nxv32i64 = zext <vscale x 32 x i8> undef to <vscale x 32 x i64>
@@ -989,6 +1950,17 @@ define void @zext() {
   %nxv32i1_nxv32i32 = zext <vscale x 32 x i1> undef to <vscale x 32 x i32>
   %nxv32i1_nxv32i64 = zext <vscale x 32 x i1> undef to <vscale x 32 x i64>
 
+  %vp_nxv32i8_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.zext.nxv32i8.nxv32i16(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i8_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i8.nxv32i32(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i8_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i8.nxv32i64(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i16.nxv32i32(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i16.nxv32i64(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i32.nxv32i64(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.zext.nxv32i1.nxv32i8(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.zext.nxv32i1.nxv32i16(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.zext.nxv32i1.nxv32i32(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i1_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.zext.nxv32i1.nxv32i64(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64i8_nxv64i16 = zext <vscale x 64 x i8> undef to <vscale x 64 x i16>
   %nxv64i8_nxv64i32 = zext <vscale x 64 x i8> undef to <vscale x 64 x i32>
   %nxv64i8_nxv64i64 = zext <vscale x 64 x i8> undef to <vscale x 64 x i64>
@@ -1000,6 +1972,17 @@ define void @zext() {
   %nxv64i1_nxv64i32 = zext <vscale x 64 x i1> undef to <vscale x 64 x i32>
   %nxv64i1_nxv64i64 = zext <vscale x 64 x i1> undef to <vscale x 64 x i64>
 
+  %vp_nxv64i8_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.zext.nxv64i8.nxv64i16(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i8_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i8.nxv64i32(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i8_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i8.nxv64i64(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i16.nxv64i32(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i16.nxv64i64(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i32.nxv64i64(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.zext.nxv64i1.nxv64i8(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.zext.nxv64i1.nxv64i16(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.zext.nxv64i1.nxv64i32(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i1_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.zext.nxv64i1.nxv64i64(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+
   %nxv128i8_nxv128i16 = zext <vscale x 128 x i8> undef to <vscale x 128 x i16>
   %nxv128i8_nxv128i32 = zext <vscale x 128 x i8> undef to <vscale x 128 x i32>
   %nxv128i8_nxv128i128 = zext <vscale x 128 x i8> undef to <vscale x 128 x i128>
@@ -1011,6 +1994,17 @@ define void @zext() {
   %nxv128i1_nxv128i32 = zext <vscale x 128 x i1> undef to <vscale x 128 x i32>
   %nxv128i1_nxv128i128 = zext <vscale x 128 x i1> undef to <vscale x 128 x i128>
 
+  %vp_nxv128i8_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.zext.nxv128i8.nxv128i16(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i8_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i8.nxv128i32(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i8_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i8.nxv128i64(<vscale x 128 x i8> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i16_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i16.nxv128i32(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i16_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i16.nxv128i64(<vscale x 128 x i16> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i32_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i32.nxv128i64(<vscale x 128 x i32> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i8 = call <vscale x 128 x i8> @llvm.vp.zext.nxv128i1.nxv128i8(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i16 = call <vscale x 128 x i16> @llvm.vp.zext.nxv128i1.nxv128i16(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i32 = call <vscale x 128 x i32> @llvm.vp.zext.nxv128i1.nxv128i32(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+  %vp_nxv128i1_nxv128i64 = call <vscale x 128 x i64> @llvm.vp.zext.nxv128i1.nxv128i64(<vscale x 128 x i1> undef, <vscale x 128 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -1021,6 +2015,11 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2i6 = trunc <2 x i16> undef to <2 x i6>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i4_v2i2 = trunc <2 x i4> undef to <2 x i2>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i6_v2i4 = trunc <2 x i6> undef to <2 x i4>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i2 = call <2 x i2> @llvm.vp.trunc.v2i2.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i4 = call <2 x i4> @llvm.vp.trunc.v2i4.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i6 = call <2 x i6> @llvm.vp.trunc.v2i6.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v2i4_v2i2 = call <2 x i2> @llvm.vp.trunc.v2i2.v2i4(<2 x i4> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v2i6_v2i4 = call <2 x i4> @llvm.vp.trunc.v2i4.v2i6(<2 x i6> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2i8 = trunc <2 x i16> undef to <2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_v2i8 = trunc <2 x i32> undef to <2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_v2i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1031,6 +2030,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2i1 = trunc <2 x i16> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_v2i1 = trunc <2 x i32> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2i1 = trunc <2 x i64> undef to <2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i32_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2i64_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i32_v2i16 = call <2 x i16> @llvm.vp.trunc.v2i16.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i64_v2i16 = call <2 x i16> @llvm.vp.trunc.v2i16.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i64_v2i32 = call <2 x i32> @llvm.vp.trunc.v2i32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i8_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i16_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i32_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i64_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i8 = trunc <4 x i16> undef to <4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i8 = trunc <4 x i32> undef to <4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_v4i8 = trunc <4 x i64> undef to <4 x i8>
@@ -1041,6 +2050,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i1 = trunc <4 x i16> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i16_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4i64_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i32_v4i16 = call <4 x i16> @llvm.vp.trunc.v4i16.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i64_v4i16 = call <4 x i16> @llvm.vp.trunc.v4i16.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i64_v4i32 = call <4 x i32> @llvm.vp.trunc.v4i32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i8_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i16_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4i64_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
@@ -1051,6 +2070,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i1 = trunc <8 x i16> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i16_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i32_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i64_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i32_v8i16 = call <8 x i16> @llvm.vp.trunc.v8i16.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8i64_v8i16 = call <8 x i16> @llvm.vp.trunc.v8i16.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i64_v8i32 = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i8_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i16_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i32_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8i64_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1061,6 +2090,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_v16i1 = trunc <2 x i16> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16i16_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16i32_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v16i64_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i32_v16i16 = call <16 x i16> @llvm.vp.trunc.v16i16.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16i64_v16i16 = call <16 x i16> @llvm.vp.trunc.v16i16.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i64_v16i32 = call <16 x i32> @llvm.vp.trunc.v16i32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i8_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i16_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i32_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16i64_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
@@ -1071,6 +2110,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32i16_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i8.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v32i32_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i8.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v32i64_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i8.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i32_v32i16 = call <32 x i16> @llvm.vp.trunc.v32i16.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32i64_v32i16 = call <32 x i16> @llvm.vp.trunc.v32i16.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32i64_v32i32 = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i8_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i16_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32i32_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v32i64_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
@@ -1081,6 +2130,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64i16_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i8.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v64i32_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i8.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v64i64_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i8.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64i32_v64i16 = call <64 x i16> @llvm.vp.trunc.v64i16.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64i64_v64i16 = call <64 x i16> @llvm.vp.trunc.v64i16.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64i64_v64i32 = call <64 x i32> @llvm.vp.trunc.v64i32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i8_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64i16_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v64i32_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64i64_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
@@ -1091,6 +2150,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v128i16_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i8.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v128i32_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i8.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v128i64_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i8.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128i32_v128i16 = call <128 x i16> @llvm.vp.trunc.v128i16.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128i64_v128i16 = call <128 x i16> @llvm.vp.trunc.v128i16.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128i64_v128i32 = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v128i8_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v128i16_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_v128i32_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128i64_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
@@ -1101,6 +2170,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v256i16_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i8.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v256i32_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i8.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vp_v256i64_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i8.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v256i32_v256i16 = call <256 x i16> @llvm.vp.trunc.v256i16.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %vp_v256i64_v256i16 = call <256 x i16> @llvm.vp.trunc.v256i16.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %vp_v256i64_v256i32 = call <256 x i32> @llvm.vp.trunc.v256i32.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v256i8_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_v256i16_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %vp_v256i32_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v256i64_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i8 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i64_nxv1i8 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i8>
@@ -1111,6 +2190,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1i1 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i1 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1i1 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1i64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i8_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i64_nxv2i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
@@ -1121,6 +2210,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2i64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i8_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2i64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
@@ -1131,6 +2230,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.trunc.nxv4i16.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4i64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.trunc.nxv4i16.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i8_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4i64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
@@ -1141,6 +2250,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8i16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8i32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv8i64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8i64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.trunc.nxv8i32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i8_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8i64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
@@ -1151,6 +2270,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i1 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16i16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv16i32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv16i64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.trunc.nxv16i16.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16i64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.trunc.nxv16i16.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16i64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.trunc.nxv16i32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i8_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16i32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16i64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
@@ -1161,6 +2290,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i16_nxv32i1 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32i16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv32i32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv32i64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32i32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.trunc.nxv32i16.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32i64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.trunc.nxv32i16.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32i64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.trunc.nxv32i32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i8_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv32i16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32i32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32i64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64i8 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i8>
@@ -1171,6 +2310,16 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_nxv64i1 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64i1 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv64i16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i8.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv64i32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i8.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64i64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i8.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64i32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.trunc.nxv64i16.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %vp_nxv64i64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.trunc.nxv64i16.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %vp_nxv64i64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.trunc.nxv64i32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv64i8_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv64i16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64i32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %vp_nxv64i64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'trunc'
@@ -1179,6 +2328,11 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2i6 = trunc <2 x i16> undef to <2 x i6>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i4_v2i2 = trunc <2 x i4> undef to <2 x i2>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i6_v2i4 = trunc <2 x i6> undef to <2 x i4>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i2 = call <2 x i2> @llvm.vp.trunc.v2i2.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i4 = call <2 x i4> @llvm.vp.trunc.v2i4.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i6 = call <2 x i6> @llvm.vp.trunc.v2i6.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v2i4_v2i2 = call <2 x i2> @llvm.vp.trunc.v2i2.v2i4(<2 x i4> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v2i6_v2i4 = call <2 x i4> @llvm.vp.trunc.v2i4.v2i6(<2 x i6> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2i8 = trunc <2 x i16> undef to <2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_v2i8 = trunc <2 x i32> undef to <2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i64_v2i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1189,6 +2343,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2i1 = trunc <2 x i16> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_v2i1 = trunc <2 x i32> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2i1 = trunc <2 x i64> undef to <2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i16_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i32_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2i64_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i32_v2i16 = call <2 x i16> @llvm.vp.trunc.v2i16.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i64_v2i16 = call <2 x i16> @llvm.vp.trunc.v2i16.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2i64_v2i32 = call <2 x i32> @llvm.vp.trunc.v2i32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i8_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i16_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i32_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2i64_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i1.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4i8 = trunc <4 x i16> undef to <4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i8 = trunc <4 x i32> undef to <4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i64_v4i8 = trunc <4 x i64> undef to <4 x i8>
@@ -1199,6 +2363,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i1 = trunc <4 x i16> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i16_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4i64_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i32_v4i16 = call <4 x i16> @llvm.vp.trunc.v4i16.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i64_v4i16 = call <4 x i16> @llvm.vp.trunc.v4i16.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4i64_v4i32 = call <4 x i32> @llvm.vp.trunc.v4i32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i8_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i16_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4i32_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v4i64_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i1.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
@@ -1209,6 +2383,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i1 = trunc <8 x i16> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i16_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i32_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i64_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8i32_v8i16 = call <8 x i16> @llvm.vp.trunc.v8i16.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8i64_v8i16 = call <8 x i16> @llvm.vp.trunc.v8i16.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i64_v8i32 = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i8_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8i16_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8i32_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v8i64_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i1.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1219,6 +2403,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16_v16i1 = trunc <2 x i16> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16i16_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16i32_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v16i64_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i32_v16i16 = call <16 x i16> @llvm.vp.trunc.v16i16.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16i64_v16i16 = call <16 x i16> @llvm.vp.trunc.v16i16.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i64_v16i32 = call <16 x i32> @llvm.vp.trunc.v16i32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16i8_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16i16_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16i32_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v16i64_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i1.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
@@ -1229,6 +2423,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32i16_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i8.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v32i32_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i8.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v32i64_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i8.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i32_v32i16 = call <32 x i16> @llvm.vp.trunc.v32i16.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32i64_v32i16 = call <32 x i16> @llvm.vp.trunc.v32i16.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32i64_v32i32 = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32i8_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32i16_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v32i32_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v32i64_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i1.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
@@ -1239,6 +2443,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64i16_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i8.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v64i32_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i8.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v64i64_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i8.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64i32_v64i16 = call <64 x i16> @llvm.vp.trunc.v64i16.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64i64_v64i16 = call <64 x i16> @llvm.vp.trunc.v64i16.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64i64_v64i32 = call <64 x i32> @llvm.vp.trunc.v64i32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v64i8_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v64i16_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v64i32_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %vp_v64i64_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i1.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
@@ -1249,6 +2463,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v128i16_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i8.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v128i32_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i8.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_v128i64_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i8.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128i32_v128i16 = call <128 x i16> @llvm.vp.trunc.v128i16.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128i64_v128i16 = call <128 x i16> @llvm.vp.trunc.v128i16.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128i64_v128i32 = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_v128i8_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_v128i16_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v128i32_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v128i64_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i1.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
@@ -1259,6 +2483,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v256i16_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i8.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v256i32_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i8.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %vp_v256i64_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i8.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v256i32_v256i16 = call <256 x i16> @llvm.vp.trunc.v256i16.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %vp_v256i64_v256i16 = call <256 x i16> @llvm.vp.trunc.v256i16.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %vp_v256i64_v256i32 = call <256 x i32> @llvm.vp.trunc.v256i32.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %vp_v256i8_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i8(<256 x i8> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %vp_v256i16_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i16(<256 x i16> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v256i32_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i32(<256 x i32> undef, <256 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v256i64_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i1.v256i64(<256 x i64> undef, <256 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i8 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i64_nxv1i8 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i8>
@@ -1269,6 +2503,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1i1 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i1 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1i1 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1i64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1i64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i8_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1i64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i1.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i64_nxv2i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
@@ -1279,6 +2523,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2i64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2i64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i8_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2i32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv2i64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
@@ -1289,6 +2543,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4i32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.trunc.nxv4i16.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4i64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.trunc.nxv4i16.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i8_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4i16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4i32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv4i64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i1.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
@@ -1299,6 +2563,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8i16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8i32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv8i64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8i64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.trunc.nxv8i32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8i8_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8i16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8i32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv8i64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i1.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
@@ -1309,6 +2583,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i1 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16i16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv16i32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv16i64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.trunc.nxv16i16.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16i64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.trunc.nxv16i16.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16i64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.trunc.nxv16i32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16i8_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16i16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv16i32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv16i64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i1.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
@@ -1319,6 +2603,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i16_nxv32i1 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32i16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv32i32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv32i64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i8.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32i32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.trunc.nxv32i16.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32i64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.trunc.nxv32i16.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32i64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.trunc.nxv32i32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv32i8_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv32i16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv32i32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv32i64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i1.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %nxv64i64_nxv64i8 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i8>
@@ -1329,6 +2623,16 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_nxv64i1 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i64_nxv64i1 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv64i16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i8.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv64i32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i8.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %vp_nxv64i64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i8.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64i32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.trunc.nxv64i16.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64i64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.trunc.nxv64i16.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64i64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.trunc.nxv64i32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %vp_nxv64i8_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %vp_nxv64i16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %vp_nxv64i32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %vp_nxv64i64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i1.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 
@@ -1338,6 +2642,12 @@ define void @trunc() {
   %v2i4_v2i2 = trunc <2 x i4> undef to <2 x i2>
   %v2i6_v2i4 = trunc <2 x i6> undef to <2 x i4>
 
+  %vp_v2i16_v2i2 = call <2 x i2> @llvm.vp.trunc.v2i16.v2i2(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i4 = call <2 x i4> @llvm.vp.trunc.v2i16.v2i4(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i6 = call <2 x i6> @llvm.vp.trunc.v2i16.v2i6(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i4_v2i2 = call <2 x i2> @llvm.vp.trunc.v2i4.v2i2(<2 x i4> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i6_v2i4 = call <2 x i4> @llvm.vp.trunc.v2i6.v2i4(<2 x i6> undef, <2 x i1> undef, i32 undef)
+
   %v2i16_v2i8 = trunc <2 x i16> undef to <2 x i8>
   %v2i32_v2i8 = trunc <2 x i32> undef to <2 x i8>
   %v2i64_v2i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1349,6 +2659,17 @@ define void @trunc() {
   %v2i32_v2i1 = trunc <2 x i32> undef to <2 x i1>
   %v2i64_v2i1 = trunc <2 x i64> undef to <2 x i1>
 
+  %vp_v2i16_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i16.v2i8(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i32_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i32.v2i8(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i64_v2i8 = call <2 x i8> @llvm.vp.trunc.v2i64.v2i8(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i32_v2i16 = call <2 x i16> @llvm.vp.trunc.v2i32.v2i16(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i64_v2i16 = call <2 x i16> @llvm.vp.trunc.v2i64.v2i16(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i64_v2i32 = call <2 x i32> @llvm.vp.trunc.v2i64.v2i32(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i8_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i8.v2i1(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i16_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i16.v2i1(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i32_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i32.v2i1(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2i64_v2i1 = call <2 x i1> @llvm.vp.trunc.v2i64.v2i1(<2 x i64> undef, <2 x i1> undef, i32 undef)
+
   %v4i16_v4i8 = trunc <4 x i16> undef to <4 x i8>
   %v4i32_v4i8 = trunc <4 x i32> undef to <4 x i8>
   %v4i64_v4i8 = trunc <4 x i64> undef to <4 x i8>
@@ -1360,6 +2681,17 @@ define void @trunc() {
   %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1>
   %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
 
+  %vp_v4i16_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i16.v4i8(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i32_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i32.v4i8(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i64_v4i8 = call <4 x i8> @llvm.vp.trunc.v4i64.v4i8(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i32_v4i16 = call <4 x i16> @llvm.vp.trunc.v4i32.v4i16(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i64_v4i16 = call <4 x i16> @llvm.vp.trunc.v4i64.v4i16(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i64_v4i32 = call <4 x i32> @llvm.vp.trunc.v4i64.v4i32(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i8_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i8.v4i1(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i16_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i16.v4i1(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i32_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i32.v4i1(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4i64_v4i1 = call <4 x i1> @llvm.vp.trunc.v4i64.v4i1(<4 x i64> undef, <4 x i1> undef, i32 undef)
+
   %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8>
   %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8>
   %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
@@ -1371,6 +2703,17 @@ define void @trunc() {
   %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
   %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
 
+  %vp_v8i16_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i16.v8i8(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i32_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i32.v8i8(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i64_v8i8 = call <8 x i8> @llvm.vp.trunc.v8i64.v8i8(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i32_v8i16 = call <8 x i16> @llvm.vp.trunc.v8i32.v8i16(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i64_v8i16 = call <8 x i16> @llvm.vp.trunc.v8i64.v8i16(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i64_v8i32 = call <8 x i32> @llvm.vp.trunc.v8i64.v8i32(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i8_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i8.v8i1(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i16_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i16.v8i1(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i32_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i32.v8i1(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8i64_v8i1 = call <8 x i1> @llvm.vp.trunc.v8i64.v8i1(<8 x i64> undef, <8 x i1> undef, i32 undef)
+
   %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8>
   %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8>
   %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1382,6 +2725,17 @@ define void @trunc() {
   %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1>
   %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1>
 
+  %vp_v16i16_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i16.v16i8(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i32_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i32.v16i8(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i64_v16i8 = call <16 x i8> @llvm.vp.trunc.v16i64.v16i8(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i32_v16i16 = call <16 x i16> @llvm.vp.trunc.v16i32.v16i16(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i64_v16i16 = call <16 x i16> @llvm.vp.trunc.v16i64.v16i16(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i64_v16i32 = call <16 x i32> @llvm.vp.trunc.v16i64.v16i32(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i8_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i8.v16i1(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i16_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i16.v16i1(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i32_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i32.v16i1(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16i64_v16i1 = call <16 x i1> @llvm.vp.trunc.v16i64.v16i1(<16 x i64> undef, <16 x i1> undef, i32 undef)
+
   %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8>
   %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
   %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
@@ -1393,6 +2747,17 @@ define void @trunc() {
   %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
   %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
 
+  %vp_v32i16_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i16.v32i8(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i32_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i32.v32i8(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i64_v32i8 = call <32 x i8> @llvm.vp.trunc.v32i64.v32i8(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i32_v32i16 = call <32 x i16> @llvm.vp.trunc.v32i32.v32i16(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i64_v32i16 = call <32 x i16> @llvm.vp.trunc.v32i64.v32i16(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i64_v32i32 = call <32 x i32> @llvm.vp.trunc.v32i64.v32i32(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i8_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i8.v32i1(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i16_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i16.v32i1(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i32_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i32.v32i1(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32i64_v32i1 = call <32 x i1> @llvm.vp.trunc.v32i64.v32i1(<32 x i64> undef, <32 x i1> undef, i32 undef)
+
   %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
   %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
   %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
@@ -1404,6 +2769,17 @@ define void @trunc() {
   %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
   %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
 
+  %vp_v64i16_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i16.v64i8(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i32_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i32.v64i8(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i64_v64i8 = call <64 x i8> @llvm.vp.trunc.v64i64.v64i8(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i32_v64i16 = call <64 x i16> @llvm.vp.trunc.v64i32.v64i16(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i64_v64i16 = call <64 x i16> @llvm.vp.trunc.v64i64.v64i16(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i64_v64i32 = call <64 x i32> @llvm.vp.trunc.v64i64.v64i32(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i8_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i8.v64i1(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i16_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i16.v64i1(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i32_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i32.v64i1(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64i64_v64i1 = call <64 x i1> @llvm.vp.trunc.v64i64.v64i1(<64 x i64> undef, <64 x i1> undef, i32 undef)
+
   %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
   %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
   %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
@@ -1415,6 +2791,17 @@ define void @trunc() {
   %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
   %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
 
+  %vp_v128i16_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i16.v128i8(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i32_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i32.v128i8(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i64_v128i8 = call <128 x i8> @llvm.vp.trunc.v128i64.v128i8(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i32_v128i16 = call <128 x i16> @llvm.vp.trunc.v128i32.v128i16(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i64_v128i16 = call <128 x i16> @llvm.vp.trunc.v128i64.v128i16(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i64_v128i32 = call <128 x i32> @llvm.vp.trunc.v128i64.v128i32(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i8_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i8.v128i1(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i16_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i16.v128i1(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i32_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i32.v128i1(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128i64_v128i1 = call <128 x i1> @llvm.vp.trunc.v128i64.v128i1(<128 x i64> undef, <128 x i1> undef, i32 undef)
+
   %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
   %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
   %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
@@ -1426,6 +2813,17 @@ define void @trunc() {
   %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
   %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
 
+  %vp_v256i16_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i16.v256i8(<256 x i16> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i32_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i32.v256i8(<256 x i32> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i64_v256i8 = call <256 x i8> @llvm.vp.trunc.v256i64.v256i8(<256 x i64> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i32_v256i16 = call <256 x i16> @llvm.vp.trunc.v256i32.v256i16(<256 x i32> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i64_v256i16 = call <256 x i16> @llvm.vp.trunc.v256i64.v256i16(<256 x i64> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i64_v256i32 = call <256 x i32> @llvm.vp.trunc.v256i64.v256i32(<256 x i64> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i8_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i8.v256i1(<256 x i8> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i16_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i16.v256i1(<256 x i16> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i32_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i32.v256i1(<256 x i32> undef, <256 x i1> undef, i32 undef)
+  %vp_v256i64_v256i1 = call <256 x i1> @llvm.vp.trunc.v256i64.v256i1(<256 x i64> undef, <256 x i1> undef, i32 undef)
+
   %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
   %nxv1i32_nxv1i8 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
   %nxv1i64_nxv1i8 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i8>
@@ -1437,6 +2835,17 @@ define void @trunc() {
   %nxv1i32_nxv1i1 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i1>
   %nxv1i64_nxv1i1 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i1>
 
+  %vp_nxv1i16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i16.nxv1i8(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i32.nxv1i8(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i64.nxv1i8(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i32.nxv1i16(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.trunc.nxv1i64.nxv1i16(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.trunc.nxv1i64.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i8_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i8.nxv1i1(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i16.nxv1i1(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i32.nxv1i1(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1i64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.trunc.nxv1i64.nxv1i1(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2i16_nxv2i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
   %nxv2i32_nxv2i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
   %nxv2i64_nxv2i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
@@ -1448,6 +2857,17 @@ define void @trunc() {
   %nxv2i32_nxv2i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
   %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
 
+  %vp_nxv2i16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i16.nxv2i8(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i32.nxv2i8(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i64.nxv2i8(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i32.nxv2i16(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i64.nxv2i16(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i8_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i8.nxv2i1(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i32.nxv2i1(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2i64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i64.nxv2i1(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4i16_nxv4i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
   %nxv4i32_nxv4i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
   %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
@@ -1459,6 +2879,17 @@ define void @trunc() {
   %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
   %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
 
+  %vp_nxv4i16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i16.nxv4i8(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i32.nxv4i8(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i64.nxv4i8(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.trunc.nxv4i32.nxv4i16(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.trunc.nxv4i64.nxv4i16(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i64.nxv4i32(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i8_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i8.nxv4i1(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i16.nxv4i1(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i32.nxv4i1(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4i64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.trunc.nxv4i64.nxv4i1(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8i16_nxv8i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
   %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
   %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
@@ -1470,6 +2901,17 @@ define void @trunc() {
   %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
   %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
 
+  %vp_nxv8i16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i16.nxv8i8(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i32.nxv8i8(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i64.nxv8i8(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i32.nxv8i16(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i64.nxv8i16(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.trunc.nxv8i64.nxv8i32(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i8_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i8.nxv8i1(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i16.nxv8i1(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i32.nxv8i1(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8i64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.trunc.nxv8i64.nxv8i1(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
   %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
   %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
@@ -1481,6 +2923,17 @@ define void @trunc() {
   %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
   %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
 
+  %vp_nxv16i16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i16.nxv16i8(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i32.nxv16i8(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i64.nxv16i8(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.trunc.nxv16i32.nxv16i16(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.trunc.nxv16i64.nxv16i16(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.trunc.nxv16i64.nxv16i32(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i8_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i8.nxv16i1(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i16.nxv16i1(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i32.nxv16i1(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16i64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.trunc.nxv16i64.nxv16i1(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
   %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
   %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
@@ -1492,6 +2945,17 @@ define void @trunc() {
   %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
   %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
 
+  %vp_nxv32i16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i16.nxv32i8(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i32.nxv32i8(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.trunc.nxv32i64.nxv32i8(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.trunc.nxv32i32.nxv32i16(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.trunc.nxv32i64.nxv32i16(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.trunc.nxv32i64.nxv32i32(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i8_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i8.nxv32i1(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i16.nxv32i1(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i32.nxv32i1(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32i64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.trunc.nxv32i64.nxv32i1(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
   %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
   %nxv64i64_nxv64i8 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i8>
@@ -1503,6 +2967,17 @@ define void @trunc() {
   %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
   %nxv64i64_nxv64i1 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i1>
 
+  %vp_nxv64i16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i16.nxv64i8(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i32.nxv64i8(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.trunc.nxv64i64.nxv64i8(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.trunc.nxv64i32.nxv64i16(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.trunc.nxv64i64.nxv64i16(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.trunc.nxv64i64.nxv64i32(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i8_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i8.nxv64i1(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i16.nxv64i1(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i32.nxv64i1(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64i64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.trunc.nxv64i64.nxv64i1(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -1511,103 +2986,201 @@ define void @fpext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f64 = fpext <2 x float> undef to <2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2f32 = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4f32 = call <4 x float> @llvm.vp.fpext.v4f32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8f32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16f32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32f32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64f32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128f32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64f32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
   %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
   %v2f32_v2f64 = fpext <2 x float> undef to <2 x double>
 
+  %vp_v2f16_v2f32 = call <2 x float> @llvm.vp.fpext.v2half.v2float(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2f64 = call <2 x double> @llvm.vp.fpext.v2half.v2double(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2f64 = call <2 x double> @llvm.vp.fpext.v2float.v2double(<2 x float> undef, <2 x i1> undef, i32 undef)
+
   %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
   %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
   %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
 
+  %vp_v4f16_v4f32 = call <4 x float> @llvm.vp.fpext.v4half.v4float(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4half.v4double(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4float.v4double(<4 x float> undef, <4 x i1> undef, i32 undef)
+
   %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
   %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
   %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
 
+  %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8half.v8float(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8half.v8double(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8float.v8double(<8 x float> undef, <8 x i1> undef, i32 undef)
+
   %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
   %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
   %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
 
+  %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16half.v16float(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16half.v16double(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16float.v16double(<16 x float> undef, <16 x i1> undef, i32 undef)
+
   %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
   %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
   %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
 
+  %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32half.v32float(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32half.v32double(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32float.v32double(<32 x float> undef, <32 x i1> undef, i32 undef)
+
   %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
   %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
   %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
 
+  %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64half.v64float(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64half.v64double(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64float.v64double(<64 x float> undef, <64 x i1> undef, i32 undef)
+
   %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
   %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
   %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
 
+  %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128half.v128float(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128half.v128double(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128float.v128double(<128 x float> undef, <128 x i1> undef, i32 undef)
+
   %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
   %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
   %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
 
+  %vp_nxv1f16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fpext.nxv1half.nxv1float(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1half.nxv1double(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1float.nxv1double(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
   %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
   %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
 
+  %vp_nxv2f16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fpext.nxv2half.nxv2float(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2half.nxv2double(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2float.nxv2double(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
   %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
   %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
 
+  %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4half.nxv4float(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4half.nxv4double(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4float.nxv4double(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
   %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
   %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
 
+  %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8half.nxv8float(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8half.nxv8double(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8float.nxv8double(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
   %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
   %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
 
+  %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16half.nxv16float(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16half.nxv16double(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16float.nxv16double(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
   %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
   %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
 
+  %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32half.nxv32float(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32half.nxv32double(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32float.nxv32double(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
   %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
   %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
 
+  %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64half.nxv64float(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64half.nxv64double(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64float.nxv64double(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -1616,103 +3189,201 @@ define void @fptrunc() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2f16 = fptrunc <2 x double> undef to <2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2f32 = fptrunc <2 x double> undef to <2 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2f32 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4f16 = fptrunc <4 x float> undef to <4 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4f16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4f32 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8f16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8f32 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f64_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16f16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f64_v16f32 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f32_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f64_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32f16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f64_v32f32 = call <32 x float> @llvm.vp.fptrunc.v32f32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f32_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f64_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64f16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f64_v64f32 = call <64 x float> @llvm.vp.fptrunc.v64f32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f32_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f64_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128f16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f64_v128f32 = call <128 x float> @llvm.vp.fptrunc.v128f32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f16 = fptrunc <vscale x 1 x float> undef to <vscale x 1 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1f16 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1f32 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1f16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv1f16 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv1f16 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv1f32 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f16 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4f16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8f16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16f16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32f16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64f16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fptrunc.nxv64f32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
   %v2f64_v2f16 = fptrunc <2 x double> undef to <2 x half>
   %v2f64_v2f32 = fptrunc <2 x double> undef to <2 x float>
 
+  %vp_v2f32_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2float.v2half(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2f16 = call <2 x half> @llvm.vp.fptrunc.v2double.v2half(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2f32 = call <2 x float> @llvm.vp.fptrunc.v2double.v2float(<2 x double> undef, <2 x i1> undef, i32 undef)
+
   %v4f32_v4f16 = fptrunc <4 x float> undef to <4 x half>
   %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half>
   %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float>
 
+  %vp_v4f32_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4float.v4half(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4f16 = call <4 x half> @llvm.vp.fptrunc.v4double.v4half(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4f32 = call <4 x float> @llvm.vp.fptrunc.v4double.v4float(<4 x double> undef, <4 x i1> undef, i32 undef)
+
   %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half>
   %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
   %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
 
+  %vp_v8f32_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8float.v8half(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8f16 = call <8 x half> @llvm.vp.fptrunc.v8double.v8half(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8f32 = call <8 x float> @llvm.vp.fptrunc.v8double.v8float(<8 x double> undef, <8 x i1> undef, i32 undef)
+
   %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
   %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
   %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
 
+  %vp_v16f32_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16float.v16half(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16f16 = call <16 x half> @llvm.vp.fptrunc.v16double.v16half(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16f32 = call <16 x float> @llvm.vp.fptrunc.v16double.v16float(<16 x double> undef, <16 x i1> undef, i32 undef)
+
   %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
   %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
   %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
 
+  %vp_v32f32_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32float.v32half(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32f16 = call <32 x half> @llvm.vp.fptrunc.v32double.v32half(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32f32 = call <32 x float> @llvm.vp.fptrunc.v32double.v32float(<32 x double> undef, <32 x i1> undef, i32 undef)
+
   %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
   %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
   %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
 
+  %vp_v64f32_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64float.v64half(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64f16 = call <64 x half> @llvm.vp.fptrunc.v64double.v64half(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64f32 = call <64 x float> @llvm.vp.fptrunc.v64double.v64float(<64 x double> undef, <64 x i1> undef, i32 undef)
+
   %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
   %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
   %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
 
+  %vp_v128f32_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128float.v128half(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128f16 = call <128 x half> @llvm.vp.fptrunc.v128double.v128half(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128f32 = call <128 x float> @llvm.vp.fptrunc.v128double.v128float(<128 x double> undef, <128 x i1> undef, i32 undef)
+
   %nxv1f32_nxv1f16 = fptrunc <vscale x 1 x float> undef to <vscale x 1 x half>
   %nxv1f64_nxv1f16 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x half>
   %nxv1f64_nxv1f32 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
 
+  %vp_nxv1f32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1float.nxv1half(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.fptrunc.nxv1double.nxv1half(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1double.nxv1float(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2f32_nxv1f16 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
   %nxv2f64_nxv1f16 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
   %nxv2f64_nxv1f32 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
 
+  %vp_nxv2f32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2float.nxv2half(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2double.nxv2half(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2double.nxv2float(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4f32_nxv4f16 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
   %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
   %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
 
+  %vp_nxv4f32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4float.nxv4half(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.fptrunc.nxv4double.nxv4half(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4double.nxv4float(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
   %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
   %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
 
+  %vp_nxv8f32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8float.nxv8half(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.fptrunc.nxv8double.nxv8half(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8double.nxv8float(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
   %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
   %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
 
+  %vp_nxv16f32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16float.nxv16half(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.fptrunc.nxv16double.nxv16half(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16double.nxv16float(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
   %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
   %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
 
+  %vp_nxv32f32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32float.nxv32half(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.fptrunc.nxv32double.nxv32half(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32double.nxv32float(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
   %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
   %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
 
+  %vp_nxv64f32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64float.nxv64half(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.fptrunc.nxv64double.nxv64half(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fptrunc.nxv64double.nxv64float(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -1733,6 +3404,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f16_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8>
@@ -1748,6 +3434,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f16_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
@@ -1763,6 +3464,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f16_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
@@ -1778,6 +3494,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f16_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f16_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
@@ -1793,6 +3524,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f16_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f16_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
@@ -1808,6 +3554,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f16_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8>
@@ -1823,6 +3584,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128f16_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128f16_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f16_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f16_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128f16_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
@@ -1838,6 +3614,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
@@ -1853,6 +3644,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
@@ -1868,6 +3674,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
@@ -1883,6 +3704,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi <vscale x 16 x half> undef to <vscale x 16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i8>
@@ -1898,6 +3734,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi <vscale x 16 x half> undef to <vscale x 16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi <vscale x 32 x half> undef to <vscale x 32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i8>
@@ -1913,6 +3764,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi <vscale x 32 x half> undef to <vscale x 32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi <vscale x 64 x half> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i8>
@@ -1928,6 +3794,21 @@ define void @fptosi() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi <vscale x 64 x half> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64f16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64f16_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64f16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64f16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'fptosi'
@@ -1946,6 +3827,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f16_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8>
@@ -1961,6 +3857,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f16_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
@@ -1976,6 +3887,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f16_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
@@ -1991,6 +3917,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f16_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f16_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
@@ -2006,6 +3947,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f16_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f16_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
@@ -2021,6 +3977,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f16_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8>
@@ -2036,6 +4007,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128f16_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128f16_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f16_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f16_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128f16_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
@@ -2051,6 +4037,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
@@ -2066,6 +4067,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
@@ -2081,6 +4097,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
@@ -2096,6 +4127,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi <vscale x 16 x half> undef to <vscale x 16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i8>
@@ -2111,6 +4157,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi <vscale x 16 x half> undef to <vscale x 16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi <vscale x 32 x half> undef to <vscale x 32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i8>
@@ -2126,6 +4187,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi <vscale x 32 x half> undef to <vscale x 32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi <vscale x 64 x half> undef to <vscale x 64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i8>
@@ -2141,6 +4217,21 @@ define void @fptosi() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi <vscale x 64 x half> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64f16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64f16_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64f16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64f16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8>
@@ -2159,6 +4250,22 @@ define void @fptosi() {
   %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1>
   %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1>
 
+  %vp_v2f16_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2half.v2i8(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2float.v2i8(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptosi.v2double.v2i8(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2half.v2i16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2float.v2i16(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptosi.v2double.v2i16(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2half.v2i32(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptosi.v2double.v2i32(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2half.v2i64(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2float.v2i64(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptosi.v2double.v2i64(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2half.v2i1(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2float.v2i1(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptosi.v2double.v2i1(<2 x double> undef, <2 x i1> undef, i32 undef)
+
   %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8>
   %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8>
   %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8>
@@ -2175,6 +4282,22 @@ define void @fptosi() {
   %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1>
   %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1>
 
+  %vp_v4f16_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4half.v4i8(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4float.v4i8(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptosi.v4double.v4i8(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4half.v4i16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4float.v4i16(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptosi.v4double.v4i16(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4half.v4i32(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptosi.v4double.v4i32(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4half.v4i64(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4float.v4i64(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptosi.v4double.v4i64(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4half.v4i1(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4float.v4i1(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptosi.v4double.v4i1(<4 x double> undef, <4 x i1> undef, i32 undef)
+
   %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8>
   %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8>
   %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8>
@@ -2191,6 +4314,22 @@ define void @fptosi() {
   %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1>
   %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1>
 
+  %vp_v8f16_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8half.v8i8(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8float.v8i8(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptosi.v8double.v8i8(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8half.v8i16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8float.v8i16(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptosi.v8double.v8i16(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8half.v8i32(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptosi.v8double.v8i32(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8half.v8i64(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8float.v8i64(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptosi.v8double.v8i64(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8half.v8i1(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8float.v8i1(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptosi.v8double.v8i1(<8 x double> undef, <8 x i1> undef, i32 undef)
+
   %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8>
   %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8>
   %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8>
@@ -2207,6 +4346,22 @@ define void @fptosi() {
   %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1>
   %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1>
 
+  %vp_v16f16_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16half.v16i8(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16float.v16i8(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptosi.v16double.v16i8(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16half.v16i16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16float.v16i16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptosi.v16double.v16i16(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16half.v16i32(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16float.v16i32(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptosi.v16double.v16i32(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16half.v16i64(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16float.v16i64(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptosi.v16double.v16i64(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16half.v16i1(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16float.v16i1(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptosi.v16double.v16i1(<16 x double> undef, <16 x i1> undef, i32 undef)
+
   %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8>
   %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8>
   %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8>
@@ -2223,6 +4378,22 @@ define void @fptosi() {
   %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1>
   %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1>
 
+  %vp_v32f16_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32half.v32i8(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32float.v32i8(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptosi.v32double.v32i8(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32half.v32i16(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32float.v32i16(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptosi.v32double.v32i16(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32half.v32i32(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32float.v32i32(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptosi.v32double.v32i32(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32half.v32i64(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32float.v32i64(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptosi.v32double.v32i64(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32half.v32i1(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32float.v32i1(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptosi.v32double.v32i1(<32 x double> undef, <32 x i1> undef, i32 undef)
+
   %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8>
   %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8>
   %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8>
@@ -2239,6 +4410,22 @@ define void @fptosi() {
   %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1>
   %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1>
 
+  %vp_v64f16_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64half.v64i8(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64float.v64i8(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptosi.v64double.v64i8(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64half.v64i16(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64float.v64i16(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptosi.v64double.v64i16(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64half.v64i32(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64float.v64i32(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptosi.v64double.v64i32(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64half.v64i64(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64float.v64i64(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptosi.v64double.v64i64(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64half.v64i1(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64float.v64i1(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptosi.v64double.v64i1(<64 x double> undef, <64 x i1> undef, i32 undef)
+
   %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8>
   %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8>
   %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8>
@@ -2255,6 +4442,22 @@ define void @fptosi() {
   %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1>
   %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1>
 
+  %vp_v128f16_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128half.v128i8(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128float.v128i8(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptosi.v128double.v128i8(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128half.v128i16(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128float.v128i16(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptosi.v128double.v128i16(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128half.v128i32(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128float.v128i32(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptosi.v128double.v128i32(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128half.v128i64(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128float.v128i64(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptosi.v128double.v128i64(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128half.v128i1(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128float.v128i1(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptosi.v128double.v128i1(<128 x double> undef, <128 x i1> undef, i32 undef)
+
   %nxv1f16_nxv1i8 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
   %nxv1f32_nxv1i8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
   %nxv1f64_nxv1i8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
@@ -2271,6 +4474,22 @@ define void @fptosi() {
   %nxv1f32_nxv1i1 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i1>
   %nxv1f64_nxv1i1 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i1>
 
+  %vp_nxv1f16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1half.nxv1i8(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1float.nxv1i8(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptosi.nxv1double.nxv1i8(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1half.nxv1i16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1float.nxv1i16(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptosi.nxv1double.nxv1i16(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1half.nxv1i32(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1double.nxv1i32(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1half.nxv1i64(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1float.nxv1i64(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptosi.nxv1double.nxv1i64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1half.nxv1i1(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1float.nxv1i1(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptosi.nxv1double.nxv1i1(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2f16_nxv2i8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
   %nxv2f32_nxv2i8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
   %nxv2f64_nxv2i8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
@@ -2287,6 +4506,22 @@ define void @fptosi() {
   %nxv2f32_nxv2i1 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i1>
   %nxv2f64_nxv2i1 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i1>
 
+  %vp_nxv2f16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2half.nxv2i8(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2float.nxv2i8(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptosi.nxv2double.nxv2i8(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2half.nxv2i16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2float.nxv2i16(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2double.nxv2i16(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2half.nxv2i32(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2double.nxv2i32(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2half.nxv2i64(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2float.nxv2i64(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptosi.nxv2double.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2half.nxv2i1(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2float.nxv2i1(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2double.nxv2i1(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4f16_nxv4i8 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
   %nxv4f32_nxv4i8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
   %nxv4f64_nxv4i8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
@@ -2303,6 +4538,22 @@ define void @fptosi() {
   %nxv4f32_nxv4i1 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i1>
   %nxv4f64_nxv4i1 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i1>
 
+  %vp_nxv4f16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4half.nxv4i8(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4float.nxv4i8(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptosi.nxv4double.nxv4i8(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4half.nxv4i16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4float.nxv4i16(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptosi.nxv4double.nxv4i16(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4half.nxv4i32(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4double.nxv4i32(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4half.nxv4i64(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4float.nxv4i64(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptosi.nxv4double.nxv4i64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4half.nxv4i1(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4float.nxv4i1(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptosi.nxv4double.nxv4i1(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8f16_nxv8i8 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
   %nxv8f32_nxv8i8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
   %nxv8f64_nxv8i8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
@@ -2319,6 +4570,22 @@ define void @fptosi() {
   %nxv8f32_nxv8i1 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i1>
   %nxv8f64_nxv8i1 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i1>
 
+  %vp_nxv8f16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8half.nxv8i8(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8float.nxv8i8(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptosi.nxv8double.nxv8i8(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8half.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8float.nxv8i16(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptosi.nxv8double.nxv8i16(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8half.nxv8i32(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8double.nxv8i32(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8half.nxv8i64(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8float.nxv8i64(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptosi.nxv8double.nxv8i64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8half.nxv8i1(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8float.nxv8i1(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptosi.nxv8double.nxv8i1(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16f16_nxv16i8 = fptosi <vscale x 16 x half> undef to <vscale x 16 x i8>
   %nxv16f32_nxv16i8 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i8>
   %nxv16f64_nxv16i8 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i8>
@@ -2335,6 +4602,22 @@ define void @fptosi() {
   %nxv16f32_nxv16i1 = fptosi <vscale x 16 x float> undef to <vscale x 16 x i1>
   %nxv16f64_nxv16i1 = fptosi <vscale x 16 x double> undef to <vscale x 16 x i1>
 
+  %vp_nxv16f16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16half.nxv16i8(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16float.nxv16i8(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptosi.nxv16double.nxv16i8(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16half.nxv16i16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16float.nxv16i16(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptosi.nxv16double.nxv16i16(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16half.nxv16i32(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16float.nxv16i32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptosi.nxv16double.nxv16i32(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16half.nxv16i64(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16float.nxv16i64(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptosi.nxv16double.nxv16i64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16half.nxv16i1(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16float.nxv16i1(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptosi.nxv16double.nxv16i1(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32f16_nxv32i8 = fptosi <vscale x 32 x half> undef to <vscale x 32 x i8>
   %nxv32f32_nxv32i8 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i8>
   %nxv32f64_nxv32i8 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i8>
@@ -2351,6 +4634,22 @@ define void @fptosi() {
   %nxv32f32_nxv32i1 = fptosi <vscale x 32 x float> undef to <vscale x 32 x i1>
   %nxv32f64_nxv32i1 = fptosi <vscale x 32 x double> undef to <vscale x 32 x i1>
 
+  %vp_nxv32f16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32half.nxv32i8(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32float.nxv32i8(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptosi.nxv32double.nxv32i8(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32half.nxv32i16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32float.nxv32i16(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptosi.nxv32double.nxv32i16(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32half.nxv32i32(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32float.nxv32i32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptosi.nxv32double.nxv32i32(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32half.nxv32i64(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32float.nxv32i64(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptosi.nxv32double.nxv32i64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32half.nxv32i1(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32float.nxv32i1(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptosi.nxv32double.nxv32i1(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64f16_nxv64i8 = fptosi <vscale x 64 x half> undef to <vscale x 64 x i8>
   %nxv64f32_nxv64i8 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i8>
   %nxv64f64_nxv64i8 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i8>
@@ -2367,6 +4666,22 @@ define void @fptosi() {
   %nxv64f32_nxv64i1 = fptosi <vscale x 64 x float> undef to <vscale x 64 x i1>
   %nxv64f64_nxv64i1 = fptosi <vscale x 64 x double> undef to <vscale x 64 x i1>
 
+  %vp_nxv64f16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64half.nxv64i8(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64float.nxv64i8(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptosi.nxv64double.nxv64i8(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64half.nxv64i16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64float.nxv64i16(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptosi.nxv64double.nxv64i16(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64half.nxv64i32(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64float.nxv64i32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptosi.nxv64double.nxv64i32(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64half.nxv64i64(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64float.nxv64i64(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptosi.nxv64double.nxv64i64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64half.nxv64i1(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64float.nxv64i1(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptosi.nxv64double.nxv64i1(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -2387,6 +4702,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f16_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8>
@@ -2402,6 +4732,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f16_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8>
@@ -2417,6 +4762,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f16_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8>
@@ -2432,6 +4792,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f16_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f16_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8>
@@ -2447,6 +4822,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f16_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f16_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8>
@@ -2462,6 +4852,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f16_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8>
@@ -2477,6 +4882,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128f16_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128f16_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f16_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f16_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128f16_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
@@ -2492,6 +4912,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
@@ -2507,6 +4942,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
@@ -2522,6 +4972,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
@@ -2537,6 +5002,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui <vscale x 16 x half> undef to <vscale x 16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i8>
@@ -2552,6 +5032,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui <vscale x 16 x half> undef to <vscale x 16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui <vscale x 32 x half> undef to <vscale x 32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i8>
@@ -2567,6 +5062,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui <vscale x 32 x half> undef to <vscale x 32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui <vscale x 64 x half> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i8>
@@ -2582,6 +5092,21 @@ define void @fptoui() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui <vscale x 64 x half> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64f16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64f16_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64f16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64f16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'fptoui'
@@ -2600,6 +5125,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2i8.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2i16.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f16_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2i64.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f16_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2i1.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8>
@@ -2615,6 +5155,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f16_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8>
@@ -2630,6 +5185,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8i8.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8i16.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8i64.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f16_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8i1.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8>
@@ -2645,6 +5215,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16i8.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16i16.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f16_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f16_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16i64.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f16_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16i1.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8>
@@ -2660,6 +5245,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32i8.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32i16.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f16_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32i32.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32f16_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f16_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32i1.v32f64(<32 x double> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8>
@@ -2675,6 +5275,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64i8.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64f16_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64i16.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64i32.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64f16_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64i64.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64f16_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64i1.v64f64(<64 x double> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8>
@@ -2690,6 +5305,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128f16_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128i8.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128f16_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128i16.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128f16_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128i32.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128f16_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128i64.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128f16_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128i1.v128f64(<128 x double> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
@@ -2705,6 +5335,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1i8.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1i16.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1i64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1i1.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
@@ -2720,6 +5365,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2i8.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2i16.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2i64.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
@@ -2735,6 +5395,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4i8.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4i16.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4i64.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4i1.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
@@ -2750,6 +5425,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8i8.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8i16.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8i64.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8i1.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui <vscale x 16 x half> undef to <vscale x 16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i8>
@@ -2765,6 +5455,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui <vscale x 16 x half> undef to <vscale x 16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16i8.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16i16.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16i32.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16f16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16i64.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16i1.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui <vscale x 32 x half> undef to <vscale x 32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i8>
@@ -2780,6 +5485,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui <vscale x 32 x half> undef to <vscale x 32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32i8.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32f16_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32i16.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32i32.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32f16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32i64.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32f16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32i1.nxv32f64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui <vscale x 64 x half> undef to <vscale x 64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i8>
@@ -2795,6 +5515,21 @@ define void @fptoui() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui <vscale x 64 x half> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64f16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64i8.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64f16_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64i16.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64f16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64i32.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64f16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64i64.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64f16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64i1.nxv64f64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8>
@@ -2813,6 +5548,22 @@ define void @fptoui() {
   %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1>
   %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1>
 
+  %vp_v2f16_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2half.v2i8(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2float.v2i8(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i8 = call <2 x i8> @llvm.vp.fptoui.v2double.v2i8(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2half.v2i16(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2float.v2i16(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i16 = call <2 x i16> @llvm.vp.fptoui.v2double.v2i16(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2half.v2i32(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i32 = call <2 x i32> @llvm.vp.fptoui.v2double.v2i32(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2half.v2i64(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2float.v2i64(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i64 = call <2 x i64> @llvm.vp.fptoui.v2double.v2i64(<2 x double> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f16_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2half.v2i1(<2 x half> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f32_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2float.v2i1(<2 x float> undef, <2 x i1> undef, i32 undef)
+  %vp_v2f64_v2i1 = call <2 x i1> @llvm.vp.fptoui.v2double.v2i1(<2 x double> undef, <2 x i1> undef, i32 undef)
+
   %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8>
   %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8>
   %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8>
@@ -2829,6 +5580,22 @@ define void @fptoui() {
   %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1>
   %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1>
 
+  %vp_v4f16_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4half.v4i8(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4float.v4i8(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i8 = call <4 x i8> @llvm.vp.fptoui.v4double.v4i8(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4half.v4i16(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4float.v4i16(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i16 = call <4 x i16> @llvm.vp.fptoui.v4double.v4i16(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4half.v4i32(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i32 = call <4 x i32> @llvm.vp.fptoui.v4double.v4i32(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4half.v4i64(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4float.v4i64(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i64 = call <4 x i64> @llvm.vp.fptoui.v4double.v4i64(<4 x double> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f16_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4half.v4i1(<4 x half> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f32_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4float.v4i1(<4 x float> undef, <4 x i1> undef, i32 undef)
+  %vp_v4f64_v4i1 = call <4 x i1> @llvm.vp.fptoui.v4double.v4i1(<4 x double> undef, <4 x i1> undef, i32 undef)
+
   %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8>
   %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8>
   %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8>
@@ -2845,6 +5612,22 @@ define void @fptoui() {
   %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1>
   %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1>
 
+  %vp_v8f16_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8half.v8i8(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8float.v8i8(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i8 = call <8 x i8> @llvm.vp.fptoui.v8double.v8i8(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8half.v8i16(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8float.v8i16(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i16 = call <8 x i16> @llvm.vp.fptoui.v8double.v8i16(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8half.v8i32(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i32 = call <8 x i32> @llvm.vp.fptoui.v8double.v8i32(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8half.v8i64(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8float.v8i64(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i64 = call <8 x i64> @llvm.vp.fptoui.v8double.v8i64(<8 x double> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f16_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8half.v8i1(<8 x half> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f32_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8float.v8i1(<8 x float> undef, <8 x i1> undef, i32 undef)
+  %vp_v8f64_v8i1 = call <8 x i1> @llvm.vp.fptoui.v8double.v8i1(<8 x double> undef, <8 x i1> undef, i32 undef)
+
   %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8>
   %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8>
   %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8>
@@ -2861,6 +5644,22 @@ define void @fptoui() {
   %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1>
   %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1>
 
+  %vp_v16f16_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16half.v16i8(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16float.v16i8(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i8 = call <16 x i8> @llvm.vp.fptoui.v16double.v16i8(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16half.v16i16(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16float.v16i16(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i16 = call <16 x i16> @llvm.vp.fptoui.v16double.v16i16(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16half.v16i32(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16float.v16i32(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i32 = call <16 x i32> @llvm.vp.fptoui.v16double.v16i32(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16half.v16i64(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16float.v16i64(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i64 = call <16 x i64> @llvm.vp.fptoui.v16double.v16i64(<16 x double> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f16_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16half.v16i1(<16 x half> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f32_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16float.v16i1(<16 x float> undef, <16 x i1> undef, i32 undef)
+  %vp_v16f64_v16i1 = call <16 x i1> @llvm.vp.fptoui.v16double.v16i1(<16 x double> undef, <16 x i1> undef, i32 undef)
+
   %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8>
   %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8>
   %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8>
@@ -2877,6 +5676,22 @@ define void @fptoui() {
   %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1>
   %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1>
 
+  %vp_v32f16_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32half.v32i8(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32float.v32i8(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i8 = call <32 x i8> @llvm.vp.fptoui.v32double.v32i8(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32half.v32i16(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32float.v32i16(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i16 = call <32 x i16> @llvm.vp.fptoui.v32double.v32i16(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32half.v32i32(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32float.v32i32(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i32 = call <32 x i32> @llvm.vp.fptoui.v32double.v32i32(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32half.v32i64(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32float.v32i64(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i64 = call <32 x i64> @llvm.vp.fptoui.v32double.v32i64(<32 x double> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f16_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32half.v32i1(<32 x half> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f32_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32float.v32i1(<32 x float> undef, <32 x i1> undef, i32 undef)
+  %vp_v32f64_v32i1 = call <32 x i1> @llvm.vp.fptoui.v32double.v32i1(<32 x double> undef, <32 x i1> undef, i32 undef)
+
   %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8>
   %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8>
   %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8>
@@ -2893,6 +5708,22 @@ define void @fptoui() {
   %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1>
   %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1>
 
+  %vp_v64f16_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64half.v64i8(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64float.v64i8(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i8 = call <64 x i8> @llvm.vp.fptoui.v64double.v64i8(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64half.v64i16(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64float.v64i16(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i16 = call <64 x i16> @llvm.vp.fptoui.v64double.v64i16(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64half.v64i32(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64float.v64i32(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i32 = call <64 x i32> @llvm.vp.fptoui.v64double.v64i32(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64half.v64i64(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64float.v64i64(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i64 = call <64 x i64> @llvm.vp.fptoui.v64double.v64i64(<64 x double> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f16_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64half.v64i1(<64 x half> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f32_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64float.v64i1(<64 x float> undef, <64 x i1> undef, i32 undef)
+  %vp_v64f64_v64i1 = call <64 x i1> @llvm.vp.fptoui.v64double.v64i1(<64 x double> undef, <64 x i1> undef, i32 undef)
+
   %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8>
   %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8>
   %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8>
@@ -2909,6 +5740,22 @@ define void @fptoui() {
   %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1>
   %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1>
 
+  %vp_v128f16_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128half.v128i8(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128float.v128i8(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i8 = call <128 x i8> @llvm.vp.fptoui.v128double.v128i8(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128half.v128i16(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128float.v128i16(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i16 = call <128 x i16> @llvm.vp.fptoui.v128double.v128i16(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128half.v128i32(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128float.v128i32(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i32 = call <128 x i32> @llvm.vp.fptoui.v128double.v128i32(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128half.v128i64(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128float.v128i64(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i64 = call <128 x i64> @llvm.vp.fptoui.v128double.v128i64(<128 x double> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f16_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128half.v128i1(<128 x half> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f32_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128float.v128i1(<128 x float> undef, <128 x i1> undef, i32 undef)
+  %vp_v128f64_v128i1 = call <128 x i1> @llvm.vp.fptoui.v128double.v128i1(<128 x double> undef, <128 x i1> undef, i32 undef)
+
   %nxv1f16_nxv1i8 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
   %nxv1f32_nxv1i8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
   %nxv1f64_nxv1i8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
@@ -2925,6 +5772,22 @@ define void @fptoui() {
   %nxv1f32_nxv1i1 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i1>
   %nxv1f64_nxv1i1 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i1>
 
+  %vp_nxv1f16_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1half.nxv1i8(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1float.nxv1i8(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i8 = call <vscale x 1 x i8> @llvm.vp.fptoui.nxv1double.nxv1i8(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1half.nxv1i16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1float.nxv1i16(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i16 = call <vscale x 1 x i16> @llvm.vp.fptoui.nxv1double.nxv1i16(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1half.nxv1i32(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i32 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1double.nxv1i32(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1half.nxv1i64(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1float.nxv1i64(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i64 = call <vscale x 1 x i64> @llvm.vp.fptoui.nxv1double.nxv1i64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f16_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1half.nxv1i1(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f32_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1float.nxv1i1(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1f64_nxv1i1 = call <vscale x 1 x i1> @llvm.vp.fptoui.nxv1double.nxv1i1(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2f16_nxv2i8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
   %nxv2f32_nxv2i8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
   %nxv2f64_nxv2i8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
@@ -2941,6 +5804,22 @@ define void @fptoui() {
   %nxv2f32_nxv2i1 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i1>
   %nxv2f64_nxv2i1 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i1>
 
+  %vp_nxv2f16_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2half.nxv2i8(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2float.nxv2i8(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i8 = call <vscale x 2 x i8> @llvm.vp.fptoui.nxv2double.nxv2i8(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2half.nxv2i16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2float.nxv2i16(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i16 = call <vscale x 2 x i16> @llvm.vp.fptoui.nxv2double.nxv2i16(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2half.nxv2i32(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i32 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2double.nxv2i32(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2half.nxv2i64(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2float.nxv2i64(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i64 = call <vscale x 2 x i64> @llvm.vp.fptoui.nxv2double.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f16_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2half.nxv2i1(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f32_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2float.nxv2i1(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2f64_nxv2i1 = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2double.nxv2i1(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4f16_nxv4i8 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
   %nxv4f32_nxv4i8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
   %nxv4f64_nxv4i8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
@@ -2957,6 +5836,22 @@ define void @fptoui() {
   %nxv4f32_nxv4i1 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i1>
   %nxv4f64_nxv4i1 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i1>
 
+  %vp_nxv4f16_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4half.nxv4i8(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4float.nxv4i8(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i8 = call <vscale x 4 x i8> @llvm.vp.fptoui.nxv4double.nxv4i8(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4half.nxv4i16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4float.nxv4i16(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i16 = call <vscale x 4 x i16> @llvm.vp.fptoui.nxv4double.nxv4i16(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4half.nxv4i32(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i32 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4double.nxv4i32(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4half.nxv4i64(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4float.nxv4i64(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i64 = call <vscale x 4 x i64> @llvm.vp.fptoui.nxv4double.nxv4i64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f16_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4half.nxv4i1(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f32_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4float.nxv4i1(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4f64_nxv4i1 = call <vscale x 4 x i1> @llvm.vp.fptoui.nxv4double.nxv4i1(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8f16_nxv8i8 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
   %nxv8f32_nxv8i8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
   %nxv8f64_nxv8i8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
@@ -2973,6 +5868,22 @@ define void @fptoui() {
   %nxv8f32_nxv8i1 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i1>
   %nxv8f64_nxv8i1 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i1>
 
+  %vp_nxv8f16_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8half.nxv8i8(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8float.nxv8i8(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i8 = call <vscale x 8 x i8> @llvm.vp.fptoui.nxv8double.nxv8i8(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8half.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8float.nxv8i16(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i16 = call <vscale x 8 x i16> @llvm.vp.fptoui.nxv8double.nxv8i16(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8half.nxv8i32(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i32 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8double.nxv8i32(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8half.nxv8i64(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8float.nxv8i64(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i64 = call <vscale x 8 x i64> @llvm.vp.fptoui.nxv8double.nxv8i64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f16_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8half.nxv8i1(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f32_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8float.nxv8i1(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8f64_nxv8i1 = call <vscale x 8 x i1> @llvm.vp.fptoui.nxv8double.nxv8i1(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16f16_nxv16i8 = fptoui <vscale x 16 x half> undef to <vscale x 16 x i8>
   %nxv16f32_nxv16i8 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i8>
   %nxv16f64_nxv16i8 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i8>
@@ -2989,6 +5900,22 @@ define void @fptoui() {
   %nxv16f32_nxv16i1 = fptoui <vscale x 16 x float> undef to <vscale x 16 x i1>
   %nxv16f64_nxv16i1 = fptoui <vscale x 16 x double> undef to <vscale x 16 x i1>
 
+  %vp_nxv16f16_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16half.nxv16i8(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16float.nxv16i8(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i8 = call <vscale x 16 x i8> @llvm.vp.fptoui.nxv16double.nxv16i8(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16half.nxv16i16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16float.nxv16i16(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i16 = call <vscale x 16 x i16> @llvm.vp.fptoui.nxv16double.nxv16i16(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16half.nxv16i32(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16float.nxv16i32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i32 = call <vscale x 16 x i32> @llvm.vp.fptoui.nxv16double.nxv16i32(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16half.nxv16i64(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16float.nxv16i64(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i64 = call <vscale x 16 x i64> @llvm.vp.fptoui.nxv16double.nxv16i64(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f16_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16half.nxv16i1(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f32_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16float.nxv16i1(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16f64_nxv16i1 = call <vscale x 16 x i1> @llvm.vp.fptoui.nxv16double.nxv16i1(<vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32f16_nxv32i8 = fptoui <vscale x 32 x half> undef to <vscale x 32 x i8>
   %nxv32f32_nxv32i8 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i8>
   %nxv32f64_nxv32i8 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i8>
@@ -3005,6 +5932,22 @@ define void @fptoui() {
   %nxv32f32_nxv32i1 = fptoui <vscale x 32 x float> undef to <vscale x 32 x i1>
   %nxv32f64_nxv32i1 = fptoui <vscale x 32 x double> undef to <vscale x 32 x i1>
 
+  %vp_nxv32f16_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32half.nxv32i8(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32float.nxv32i8(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i8 = call <vscale x 32 x i8> @llvm.vp.fptoui.nxv32double.nxv32i8(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32half.nxv32i16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32float.nxv32i16(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i16 = call <vscale x 32 x i16> @llvm.vp.fptoui.nxv32double.nxv32i16(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32half.nxv32i32(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32float.nxv32i32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i32 = call <vscale x 32 x i32> @llvm.vp.fptoui.nxv32double.nxv32i32(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32half.nxv32i64(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32float.nxv32i64(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i64 = call <vscale x 32 x i64> @llvm.vp.fptoui.nxv32double.nxv32i64(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f16_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32half.nxv32i1(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f32_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32float.nxv32i1(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32f64_nxv32i1 = call <vscale x 32 x i1> @llvm.vp.fptoui.nxv32double.nxv32i1(<vscale x 32 x double> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64f16_nxv64i8 = fptoui <vscale x 64 x half> undef to <vscale x 64 x i8>
   %nxv64f32_nxv64i8 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i8>
   %nxv64f64_nxv64i8 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i8>
@@ -3021,6 +5964,22 @@ define void @fptoui() {
   %nxv64f32_nxv64i1 = fptoui <vscale x 64 x float> undef to <vscale x 64 x i1>
   %nxv64f64_nxv64i1 = fptoui <vscale x 64 x double> undef to <vscale x 64 x i1>
 
+  %vp_nxv64f16_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64half.nxv64i8(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64float.nxv64i8(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i8 = call <vscale x 64 x i8> @llvm.vp.fptoui.nxv64double.nxv64i8(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64half.nxv64i16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64float.nxv64i16(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i16 = call <vscale x 64 x i16> @llvm.vp.fptoui.nxv64double.nxv64i16(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64half.nxv64i32(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64float.nxv64i32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i32 = call <vscale x 64 x i32> @llvm.vp.fptoui.nxv64double.nxv64i32(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64half.nxv64i64(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64float.nxv64i64(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i64 = call <vscale x 64 x i64> @llvm.vp.fptoui.nxv64double.nxv64i64(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f16_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64half.nxv64i1(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f32_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64float.nxv64i1(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64f64_nxv64i1 = call <vscale x 64 x i1> @llvm.vp.fptoui.nxv64double.nxv64i1(<vscale x 64 x double> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -3041,6 +6000,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi8_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi64_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
@@ -3056,6 +6030,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi8_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
@@ -3071,6 +6060,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi8_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double>
@@ -3086,6 +6090,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi8_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi64_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double>
@@ -3101,6 +6120,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi8_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi64_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double>
@@ -3116,6 +6150,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi8_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi16_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi32_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi64_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi1_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double>
@@ -3131,6 +6180,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128fi8_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128fi16_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi32_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi64_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128fi1_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
@@ -3146,6 +6210,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi8_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
@@ -3161,6 +6240,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi8_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
@@ -3176,6 +6270,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi8_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
@@ -3191,6 +6300,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi8_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi64_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
@@ -3206,6 +6330,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi8_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
@@ -3221,6 +6360,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi8_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi16_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi1_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
@@ -3236,6 +6390,21 @@ define void @sitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64fi8_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64fi16_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64fi64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64fi1_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'sitofp'
@@ -3254,6 +6423,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi8_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi64_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
@@ -3269,6 +6453,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi8_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
@@ -3284,6 +6483,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi8_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double>
@@ -3299,6 +6513,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi8_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi64_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double>
@@ -3314,6 +6543,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi8_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi64_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double>
@@ -3329,6 +6573,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi8_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi16_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi32_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi64_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi1_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double>
@@ -3344,6 +6603,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128fi8_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128fi16_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi32_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi64_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128fi1_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
@@ -3359,6 +6633,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi8_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
@@ -3374,6 +6663,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi8_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
@@ -3389,6 +6693,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi8_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
@@ -3404,6 +6723,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi8_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi64_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
@@ -3419,6 +6753,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi8_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
@@ -3434,6 +6783,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi8_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi16_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi1_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
@@ -3449,6 +6813,21 @@ define void @sitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64fi8_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64fi16_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64fi64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64fi1_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half>
@@ -3467,6 +6846,22 @@ define void @sitofp() {
   %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float>
   %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double>
 
+  %vp_v2fi8_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i8.v2half(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i8.v2float(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i8.v2double(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi16_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i16.v2half(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i16.v2float(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i16.v2double(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi32_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i32.v2half(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i32.v2float(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i32.v2double(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi64_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i64.v2half(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i64.v2float(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi1_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i1.v2half(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i1.v2float(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i1.v2double(<2 x i1> undef, <2 x i1> undef, i32 undef)
+
   %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half>
   %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
   %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
@@ -3483,6 +6878,22 @@ define void @sitofp() {
   %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float>
   %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double>
 
+  %vp_v4fi8_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i8.v4half(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i8.v4float(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i8.v4double(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi16_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i16.v4half(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i16.v4float(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i16.v4double(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi32_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i32.v4half(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i32.v4float(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i32.v4double(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi64_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i64.v4half(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i64.v4float(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi1_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i1.v4half(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i1.v4float(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i1.v4double(<4 x i1> undef, <4 x i1> undef, i32 undef)
+
   %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half>
   %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
   %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
@@ -3499,6 +6910,22 @@ define void @sitofp() {
   %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float>
   %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double>
 
+  %vp_v8fi8_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i8.v8half(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i8.v8float(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i8.v8double(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi16_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i16.v8half(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i16.v8float(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i16.v8double(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi32_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i32.v8half(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i32.v8float(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i32.v8double(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi64_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i64.v8half(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i64.v8float(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi1_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i1.v8half(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i1.v8float(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i1.v8double(<8 x i1> undef, <8 x i1> undef, i32 undef)
+
   %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half>
   %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
   %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double>
@@ -3515,6 +6942,22 @@ define void @sitofp() {
   %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float>
   %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double>
 
+  %vp_v16fi8_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i8.v16half(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i8.v16float(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i8.v16double(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi16_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i16.v16half(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i16.v16float(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i16.v16double(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi32_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i32.v16half(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i32.v16float(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i32.v16double(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi64_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i64.v16half(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i64.v16float(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i64.v16double(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi1_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i1.v16half(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i1.v16float(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i1.v16double(<16 x i1> undef, <16 x i1> undef, i32 undef)
+
   %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half>
   %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float>
   %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double>
@@ -3531,6 +6974,22 @@ define void @sitofp() {
   %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float>
   %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double>
 
+  %vp_v32fi8_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i8.v32half(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i8.v32float(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i8.v32double(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi16_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i16.v32half(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i16.v32float(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i16.v32double(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi32_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i32.v32half(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i32.v32float(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i32.v32double(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi64_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i64.v32half(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i64.v32float(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i64.v32double(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi1_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i1.v32half(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i1.v32float(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i1.v32double(<32 x i1> undef, <32 x i1> undef, i32 undef)
+
   %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half>
   %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float>
   %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double>
@@ -3547,6 +7006,22 @@ define void @sitofp() {
   %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float>
   %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double>
 
+  %vp_v64fi8_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i8.v64half(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i8.v64float(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i8.v64double(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi16_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i16.v64half(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i16.v64float(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i16.v64double(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi32_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i32.v64half(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i32.v64float(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i32.v64double(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi64_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i64.v64half(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i64.v64float(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i64.v64double(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi1_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i1.v64half(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i1.v64float(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i1.v64double(<64 x i1> undef, <64 x i1> undef, i32 undef)
+
   %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
   %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float>
   %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double>
@@ -3563,6 +7038,22 @@ define void @sitofp() {
   %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float>
   %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double>
 
+  %vp_v128fi8_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i8.v128half(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i8.v128float(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i8.v128double(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi16_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i16.v128half(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i16.v128float(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i16.v128double(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi32_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i32.v128half(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i32.v128float(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i32.v128double(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi64_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i64.v128half(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i64.v128float(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i64.v128double(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi1_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i1.v128half(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i1.v128float(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i1.v128double(<128 x i1> undef, <128 x i1> undef, i32 undef)
+
   %nxv1i8_nxv1f16 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x half>
   %nxv1i8_nxv1f32 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
   %nxv1i8_nxv1f64 = sitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
@@ -3579,6 +7070,22 @@ define void @sitofp() {
   %nxv1i1_nxv1f32 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
   %nxv1i1_nxv1f64 = sitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
 
+  %vp_nxv1fi8_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i8.nxv1half(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i8.nxv1float(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i8.nxv1double(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi16_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i16.nxv1half(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i16.nxv1float(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i16.nxv1double(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i32.nxv1half(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i32.nxv1float(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i32.nxv1double(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i64.nxv1half(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i64.nxv1float(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi1_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i1.nxv1half(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i1.nxv1float(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i1.nxv1double(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2i8_nxv2f16 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x half>
   %nxv2i8_nxv2f32 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
   %nxv2i8_nxv2f64 = sitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
@@ -3595,6 +7102,22 @@ define void @sitofp() {
   %nxv2i1_nxv2f32 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
   %nxv2i1_nxv2f64 = sitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
 
+  %vp_nxv2fi8_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i8.nxv2half(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i8.nxv2float(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i8.nxv2double(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi16_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i16.nxv2half(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i16.nxv2float(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i16.nxv2double(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i32.nxv2half(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i32.nxv2float(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i32.nxv2double(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i64.nxv2half(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i64.nxv2float(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi1_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i1.nxv2half(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i1.nxv2float(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i1.nxv2double(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4i8_nxv4f16 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x half>
   %nxv4i8_nxv4f32 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
   %nxv4i8_nxv4f64 = sitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
@@ -3611,6 +7134,22 @@ define void @sitofp() {
   %nxv4i1_nxv4f32 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
   %nxv4i1_nxv4f64 = sitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
 
+  %vp_nxv4fi8_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i8.nxv4half(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i8.nxv4float(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i8.nxv4double(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi16_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i16.nxv4half(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i16.nxv4float(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i16.nxv4double(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i32.nxv4half(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i32.nxv4float(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i32.nxv4double(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i64.nxv4half(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i64.nxv4float(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi1_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i1.nxv4half(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i1.nxv4float(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i1.nxv4double(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8i8_nxv8f16 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x half>
   %nxv8i8_nxv8f32 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
   %nxv8i8_nxv8f64 = sitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
@@ -3627,6 +7166,22 @@ define void @sitofp() {
   %nxv8i1_nxv8f32 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
   %nxv8i1_nxv8f64 = sitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
 
+  %vp_nxv8fi8_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv8i8.nxv8half(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi8_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv8i8.nxv8float(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi8_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv8i8.nxv8double(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi16_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv8i16.nxv8half(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi16_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv8i16.nxv8float(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi16_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv8i16.nxv8double(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi32_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv8i32.nxv8half(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi32_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv8i32.nxv8float(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi32_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv8i32.nxv8double(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi64_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv8i64.nxv8half(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi64_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv8i64.nxv8float(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi64_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv8i64.nxv8double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi1_nxv8f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv8i1.nxv8half(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi1_nxv8f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv8i1.nxv8float(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv8fi1_nxv8f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv8i1.nxv8double(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv16i8_nxv16f16 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x half>
   %nxv16i8_nxv16f32 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
   %nxv16i8_nxv16f64 = sitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
@@ -3643,6 +7198,22 @@ define void @sitofp() {
   %nxv16i1_nxv16f32 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
   %nxv16i1_nxv16f64 = sitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
 
+  %vp_nxv16fi8_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i8.nxv16half(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i8.nxv16float(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i8.nxv16double(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi16_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i16.nxv16half(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i16.nxv16float(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i16.nxv16double(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i32.nxv16half(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i32.nxv16float(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i32.nxv16double(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i64.nxv16half(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i64.nxv16float(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i64.nxv16double(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi1_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i1.nxv16half(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i1.nxv16float(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i1.nxv16double(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32i8_nxv32f16 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x half>
   %nxv32i8_nxv32f32 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
   %nxv32i8_nxv32f64 = sitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
@@ -3659,6 +7230,22 @@ define void @sitofp() {
   %nxv32i1_nxv32f32 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
   %nxv32i1_nxv32f64 = sitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
 
+  %vp_nxv32fi8_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i8.nxv32half(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i8.nxv32float(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i8.nxv32double(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi16_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i16.nxv32half(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i16.nxv32float(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i16.nxv32double(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i32.nxv32half(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i32.nxv32float(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i32.nxv32double(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i64.nxv32half(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i64.nxv32float(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i64.nxv32double(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi1_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i1.nxv32half(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i1.nxv32float(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i1.nxv32double(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64i8_nxv64f16 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x half>
   %nxv64i8_nxv64f32 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
   %nxv64i8_nxv64f64 = sitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
@@ -3675,6 +7262,22 @@ define void @sitofp() {
   %nxv64i1_nxv64f32 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
   %nxv64i1_nxv64f64 = sitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
 
+  %vp_nxv64fi8_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i8.nxv64half(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i8.nxv64float(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i8.nxv64double(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi16_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i16.nxv64half(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i16.nxv64float(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i16.nxv64double(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i32.nxv64half(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i32.nxv64float(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i32.nxv64double(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i64.nxv64half(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i64.nxv64float(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i64.nxv64double(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi1_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i1.nxv64half(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i1.nxv64float(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i1.nxv64double(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
@@ -3695,6 +7298,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi8_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi64_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
@@ -3710,6 +7328,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi8_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
@@ -3725,6 +7358,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi8_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double>
@@ -3740,6 +7388,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi8_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi64_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double>
@@ -3755,6 +7418,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi8_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi64_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double>
@@ -3770,6 +7448,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi8_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi16_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi32_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi64_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi1_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double>
@@ -3785,6 +7478,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128fi8_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128fi16_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi32_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi64_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128fi1_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
@@ -3800,6 +7508,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi8_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
@@ -3815,6 +7538,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi8_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
@@ -3830,6 +7568,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi8_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
@@ -3845,6 +7598,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi8_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
@@ -3860,6 +7628,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi8_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
@@ -3875,6 +7658,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi8_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi16_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi1_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
@@ -3890,6 +7688,21 @@ define void @uitofp() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x half>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64fi8_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64fi16_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64fi64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64fi1_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; RV64-LABEL: 'uitofp'
@@ -3908,6 +7721,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi8_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i8(<2 x i8> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2fi64_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f16 = call <2 x half> @llvm.vp.sitofp.v2f16.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2f32.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i1(<2 x i1> undef, <2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
@@ -3923,6 +7751,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi8_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4fi64_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f16 = call <4 x half> @llvm.vp.sitofp.v4f16.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> undef, <4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
@@ -3938,6 +7781,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi8_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i8(<8 x i8> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8fi64_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f16 = call <8 x half> @llvm.vp.sitofp.v8f16.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i1(<8 x i1> undef, <8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double>
@@ -3953,6 +7811,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi8_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i8(<16 x i8> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16fi64_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f16 = call <16 x half> @llvm.vp.sitofp.v16f16.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16f32.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i1(<16 x i1> undef, <16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double>
@@ -3968,6 +7841,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi8_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i8(<32 x i8> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i16(<32 x i16> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i32(<32 x i32> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v32fi64_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f16 = call <32 x half> @llvm.vp.sitofp.v32f16.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32f32.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32f64.v32i1(<32 x i1> undef, <32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double>
@@ -3983,6 +7871,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi8_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i8(<64 x i8> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v64fi16_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i16(<64 x i16> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi32_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i32(<64 x i32> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v64fi64_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i64(<64 x i64> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v64fi1_v64f16 = call <64 x half> @llvm.vp.sitofp.v64f16.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64f32.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64f64.v64i1(<64 x i1> undef, <64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double>
@@ -3998,6 +7901,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v128fi8_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i8(<128 x i8> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v128fi16_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i16(<128 x i16> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v128fi32_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i32(<128 x i32> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_v128fi64_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i64(<128 x i64> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_v128fi1_v128f16 = call <128 x half> @llvm.vp.sitofp.v128f16.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128f32.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128f64.v128i1(<128 x i1> undef, <128 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
@@ -4013,6 +7931,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi8_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1fi64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1f16.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1f32.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i1(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
@@ -4028,6 +7961,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi8_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2fi64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2f16.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i1(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
@@ -4043,6 +7991,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi8_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4fi64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4f16.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4f32.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
@@ -4058,6 +8021,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi8_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi8_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8fi64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv8fi64_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8f16.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8f32.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv8fi1_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i1(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
@@ -4073,6 +8051,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi8_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv16fi64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16f16.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16f32.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16f64.nxv16i1(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
@@ -4088,6 +8081,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi8_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv32fi16_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i32(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv32fi64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i64(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv32fi1_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32f16.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32f32.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32f64.nxv32i1(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
@@ -4103,6 +8111,21 @@ define void @uitofp() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x half>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv64fi8_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv64fi16_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i16(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv64fi32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i32(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %vp_nxv64fi64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i64(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %vp_nxv64fi1_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64f16.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64f32.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64f64.nxv64i1(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half>
@@ -4121,6 +8144,22 @@ define void @uitofp() {
   %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float>
   %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double>
 
+  %vp_v2fi8_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i8.v2half(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi8_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i8.v2float(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi8_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i8.v2double(<2 x i8> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi16_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i16.v2half(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi16_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i16.v2float(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi16_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i16.v2double(<2 x i16> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi32_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i32.v2half(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi32_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i32.v2float(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi32_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i32.v2double(<2 x i32> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi64_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i64.v2half(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi64_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i64.v2float(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi64_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi1_v2f16 = call <2 x half> @llvm.vp.sitofp.v2i1.v2half(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi1_v2f32 = call <2 x float> @llvm.vp.sitofp.v2i1.v2float(<2 x i1> undef, <2 x i1> undef, i32 undef)
+  %vp_v2fi1_v2f64 = call <2 x double> @llvm.vp.sitofp.v2i1.v2double(<2 x i1> undef, <2 x i1> undef, i32 undef)
+
   %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half>
   %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
   %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
@@ -4137,6 +8176,22 @@ define void @uitofp() {
   %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float>
   %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double>
 
+  %vp_v4fi8_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i8.v4half(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi8_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i8.v4float(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi8_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i8.v4double(<4 x i8> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi16_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i16.v4half(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi16_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i16.v4float(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi16_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i16.v4double(<4 x i16> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi32_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i32.v4half(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi32_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i32.v4float(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi32_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i32.v4double(<4 x i32> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi64_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i64.v4half(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi64_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i64.v4float(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi64_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi1_v4f16 = call <4 x half> @llvm.vp.sitofp.v4i1.v4half(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi1_v4f32 = call <4 x float> @llvm.vp.sitofp.v4i1.v4float(<4 x i1> undef, <4 x i1> undef, i32 undef)
+  %vp_v4fi1_v4f64 = call <4 x double> @llvm.vp.sitofp.v4i1.v4double(<4 x i1> undef, <4 x i1> undef, i32 undef)
+
   %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half>
   %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
   %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
@@ -4153,6 +8208,22 @@ define void @uitofp() {
   %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float>
   %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double>
 
+  %vp_v8fi8_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i8.v8half(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi8_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i8.v8float(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi8_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i8.v8double(<8 x i8> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi16_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i16.v8half(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi16_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i16.v8float(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi16_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i16.v8double(<8 x i16> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi32_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i32.v8half(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi32_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i32.v8float(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi32_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i32.v8double(<8 x i32> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi64_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i64.v8half(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi64_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i64.v8float(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi64_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi1_v8f16 = call <8 x half> @llvm.vp.sitofp.v8i1.v8half(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi1_v8f32 = call <8 x float> @llvm.vp.sitofp.v8i1.v8float(<8 x i1> undef, <8 x i1> undef, i32 undef)
+  %vp_v8fi1_v8f64 = call <8 x double> @llvm.vp.sitofp.v8i1.v8double(<8 x i1> undef, <8 x i1> undef, i32 undef)
+
   %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half>
   %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
   %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double>
@@ -4169,6 +8240,22 @@ define void @uitofp() {
   %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float>
   %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double>
 
+  %vp_v16fi8_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i8.v16half(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi8_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i8.v16float(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi8_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i8.v16double(<16 x i8> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi16_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i16.v16half(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi16_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i16.v16float(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi16_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i16.v16double(<16 x i16> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi32_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i32.v16half(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi32_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i32.v16float(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi32_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i32.v16double(<16 x i32> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi64_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i64.v16half(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi64_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i64.v16float(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi64_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i64.v16double(<16 x i64> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi1_v16f16 = call <16 x half> @llvm.vp.sitofp.v16i1.v16half(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi1_v16f32 = call <16 x float> @llvm.vp.sitofp.v16i1.v16float(<16 x i1> undef, <16 x i1> undef, i32 undef)
+  %vp_v16fi1_v16f64 = call <16 x double> @llvm.vp.sitofp.v16i1.v16double(<16 x i1> undef, <16 x i1> undef, i32 undef)
+
   %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half>
   %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float>
   %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double>
@@ -4185,6 +8272,22 @@ define void @uitofp() {
   %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float>
   %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double>
 
+  %vp_v32fi8_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i8.v32half(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi8_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i8.v32float(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi8_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i8.v32double(<32 x i8> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi16_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i16.v32half(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi16_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i16.v32float(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi16_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i16.v32double(<32 x i16> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi32_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i32.v32half(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi32_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i32.v32float(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi32_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i32.v32double(<32 x i32> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi64_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i64.v32half(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi64_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i64.v32float(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi64_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i64.v32double(<32 x i64> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi1_v32f16 = call <32 x half> @llvm.vp.sitofp.v32i1.v32half(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi1_v32f32 = call <32 x float> @llvm.vp.sitofp.v32i1.v32float(<32 x i1> undef, <32 x i1> undef, i32 undef)
+  %vp_v32fi1_v32f64 = call <32 x double> @llvm.vp.sitofp.v32i1.v32double(<32 x i1> undef, <32 x i1> undef, i32 undef)
+
   %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half>
   %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float>
   %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double>
@@ -4201,6 +8304,22 @@ define void @uitofp() {
   %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float>
   %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double>
 
+  %vp_v64fi8_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i8.v64half(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi8_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i8.v64float(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi8_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i8.v64double(<64 x i8> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi16_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i16.v64half(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi16_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i16.v64float(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi16_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i16.v64double(<64 x i16> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi32_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i32.v64half(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi32_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i32.v64float(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi32_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i32.v64double(<64 x i32> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi64_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i64.v64half(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi64_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i64.v64float(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi64_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i64.v64double(<64 x i64> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi1_v64f16 = call <64 x half> @llvm.vp.sitofp.v64i1.v64half(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi1_v64f32 = call <64 x float> @llvm.vp.sitofp.v64i1.v64float(<64 x i1> undef, <64 x i1> undef, i32 undef)
+  %vp_v64fi1_v64f64 = call <64 x double> @llvm.vp.sitofp.v64i1.v64double(<64 x i1> undef, <64 x i1> undef, i32 undef)
+
   %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
   %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float>
   %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double>
@@ -4217,6 +8336,22 @@ define void @uitofp() {
   %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float>
   %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double>
 
+  %vp_v128fi8_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i8.v128half(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi8_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i8.v128float(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi8_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i8.v128double(<128 x i8> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi16_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i16.v128half(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi16_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i16.v128float(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi16_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i16.v128double(<128 x i16> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi32_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i32.v128half(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi32_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i32.v128float(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi32_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i32.v128double(<128 x i32> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi64_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i64.v128half(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi64_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i64.v128float(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi64_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i64.v128double(<128 x i64> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi1_v128f16 = call <128 x half> @llvm.vp.sitofp.v128i1.v128half(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi1_v128f32 = call <128 x float> @llvm.vp.sitofp.v128i1.v128float(<128 x i1> undef, <128 x i1> undef, i32 undef)
+  %vp_v128fi1_v128f64 = call <128 x double> @llvm.vp.sitofp.v128i1.v128double(<128 x i1> undef, <128 x i1> undef, i32 undef)
+
   %nxv1i8_nxv1f16 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x half>
   %nxv1i8_nxv1f32 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x float>
   %nxv1i8_nxv1f64 = uitofp <vscale x 1 x i8> undef to <vscale x 1 x double>
@@ -4233,6 +8368,22 @@ define void @uitofp() {
   %nxv1i1_nxv1f32 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x float>
   %nxv1i1_nxv1f64 = uitofp <vscale x 1 x i1> undef to <vscale x 1 x double>
 
+  %vp_nxv1fi8_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i8.nxv1half(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi8_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i8.nxv1float(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi8_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i8.nxv1double(<vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi16_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i16.nxv1half(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi16_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i16.nxv1float(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i16.nxv1double(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi32_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i32.nxv1half(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi32_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i32.nxv1float(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i32.nxv1double(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi64_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i64.nxv1half(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi64_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i64.nxv1float(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi64_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi1_nxv1f16 = call <vscale x 1 x half> @llvm.vp.sitofp.nxv1i1.nxv1half(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi1_nxv1f32 = call <vscale x 1 x float> @llvm.vp.sitofp.nxv1i1.nxv1float(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+  %vp_nxv1fi1_nxv1f64 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i1.nxv1double(<vscale x 1 x i1> undef, <vscale x 1 x i1> undef, i32 undef)
+
   %nxv2i8_nxv2f16 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x half>
   %nxv2i8_nxv2f32 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x float>
   %nxv2i8_nxv2f64 = uitofp <vscale x 2 x i8> undef to <vscale x 2 x double>
@@ -4249,6 +8400,22 @@ define void @uitofp() {
   %nxv2i1_nxv2f32 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x float>
   %nxv2i1_nxv2f64 = uitofp <vscale x 2 x i1> undef to <vscale x 2 x double>
 
+  %vp_nxv2fi8_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i8.nxv2half(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi8_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i8.nxv2float(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi8_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i8.nxv2double(<vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi16_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i16.nxv2half(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i16.nxv2float(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i16.nxv2double(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi32_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i32.nxv2half(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi32_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i32.nxv2float(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i32.nxv2double(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi64_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i64.nxv2half(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi64_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i64.nxv2float(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi64_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi1_nxv2f16 = call <vscale x 2 x half> @llvm.vp.sitofp.nxv2i1.nxv2half(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi1_nxv2f32 = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2i1.nxv2float(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+  %vp_nxv2fi1_nxv2f64 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i1.nxv2double(<vscale x 2 x i1> undef, <vscale x 2 x i1> undef, i32 undef)
+
   %nxv4i8_nxv4f16 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x half>
   %nxv4i8_nxv4f32 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x float>
   %nxv4i8_nxv4f64 = uitofp <vscale x 4 x i8> undef to <vscale x 4 x double>
@@ -4265,6 +8432,22 @@ define void @uitofp() {
   %nxv4i1_nxv4f32 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x float>
   %nxv4i1_nxv4f64 = uitofp <vscale x 4 x i1> undef to <vscale x 4 x double>
 
+  %vp_nxv4fi8_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i8.nxv4half(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi8_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i8.nxv4float(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi8_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i8.nxv4double(<vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi16_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i16.nxv4half(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i16.nxv4float(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i16.nxv4double(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi32_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i32.nxv4half(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi32_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i32.nxv4float(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i32.nxv4double(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi64_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i64.nxv4half(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi64_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i64.nxv4float(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi64_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi1_nxv4f16 = call <vscale x 4 x half> @llvm.vp.sitofp.nxv4i1.nxv4half(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi1_nxv4f32 = call <vscale x 4 x float> @llvm.vp.sitofp.nxv4i1.nxv4float(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+  %vp_nxv4fi1_nxv4f64 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i1.nxv4double(<vscale x 4 x i1> undef, <vscale x 4 x i1> undef, i32 undef)
+
   %nxv8i8_nxv8f16 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x half>
   %nxv8i8_nxv8f32 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x float>
   %nxv8i8_nxv8f64 = uitofp <vscale x 8 x i8> undef to <vscale x 8 x double>
@@ -4281,6 +8464,22 @@ define void @uitofp() {
   %nxv8i1_nxv8f32 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x float>
   %nxv8i1_nxv8f64 = uitofp <vscale x 8 x i1> undef to <vscale x 8 x double>
 
+  %vp_nxv8fi8_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8i8.nxv8half(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi8_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8i8.nxv8float(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi8_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i8.nxv8double(<vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi16_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8i16.nxv8half(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8i16.nxv8float(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i16.nxv8double(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi32_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8i32.nxv8half(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi32_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8i32.nxv8float(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i32.nxv8double(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi64_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8i64.nxv8half(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi64_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8i64.nxv8float(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi64_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi1_nxv8f16 = call <vscale x 8 x half> @llvm.vp.sitofp.nxv8i1.nxv8half(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi1_nxv8f32 = call <vscale x 8 x float> @llvm.vp.sitofp.nxv8i1.nxv8float(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+  %vp_nxv8fi1_nxv8f64 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i1.nxv8double(<vscale x 8 x i1> undef, <vscale x 8 x i1> undef, i32 undef)
+
   %nxv16i8_nxv16f16 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x half>
   %nxv16i8_nxv16f32 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x float>
   %nxv16i8_nxv16f64 = uitofp <vscale x 16 x i8> undef to <vscale x 16 x double>
@@ -4297,6 +8496,22 @@ define void @uitofp() {
   %nxv16i1_nxv16f32 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x float>
   %nxv16i1_nxv16f64 = uitofp <vscale x 16 x i1> undef to <vscale x 16 x double>
 
+  %vp_nxv16fi8_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i8.nxv16half(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi8_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i8.nxv16float(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi8_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i8.nxv16double(<vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi16_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i16.nxv16half(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i16.nxv16float(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i16.nxv16double(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi32_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i32.nxv16half(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi32_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i32.nxv16float(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i32.nxv16double(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi64_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i64.nxv16half(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi64_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i64.nxv16float(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi64_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i64.nxv16double(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi1_nxv16f16 = call <vscale x 16 x half> @llvm.vp.sitofp.nxv16i1.nxv16half(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi1_nxv16f32 = call <vscale x 16 x float> @llvm.vp.sitofp.nxv16i1.nxv16float(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+  %vp_nxv16fi1_nxv16f64 = call <vscale x 16 x double> @llvm.vp.sitofp.nxv16i1.nxv16double(<vscale x 16 x i1> undef, <vscale x 16 x i1> undef, i32 undef)
+
   %nxv32i8_nxv32f16 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x half>
   %nxv32i8_nxv32f32 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x float>
   %nxv32i8_nxv32f64 = uitofp <vscale x 32 x i8> undef to <vscale x 32 x double>
@@ -4313,6 +8528,22 @@ define void @uitofp() {
   %nxv32i1_nxv32f32 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x float>
   %nxv32i1_nxv32f64 = uitofp <vscale x 32 x i1> undef to <vscale x 32 x double>
 
+  %vp_nxv32fi8_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i8.nxv32half(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi8_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i8.nxv32float(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi8_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i8.nxv32double(<vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi16_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i16.nxv32half(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i16.nxv32float(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i16.nxv32double(<vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi32_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i32.nxv32half(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi32_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i32.nxv32float(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i32.nxv32double(<vscale x 32 x i32> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi64_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i64.nxv32half(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi64_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i64.nxv32float(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi64_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i64.nxv32double(<vscale x 32 x i64> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi1_nxv32f16 = call <vscale x 32 x half> @llvm.vp.sitofp.nxv32i1.nxv32half(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi1_nxv32f32 = call <vscale x 32 x float> @llvm.vp.sitofp.nxv32i1.nxv32float(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+  %vp_nxv32fi1_nxv32f64 = call <vscale x 32 x double> @llvm.vp.sitofp.nxv32i1.nxv32double(<vscale x 32 x i1> undef, <vscale x 32 x i1> undef, i32 undef)
+
   %nxv64i8_nxv64f16 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x half>
   %nxv64i8_nxv64f32 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x float>
   %nxv64i8_nxv64f64 = uitofp <vscale x 64 x i8> undef to <vscale x 64 x double>
@@ -4329,6 +8560,22 @@ define void @uitofp() {
   %nxv64i1_nxv64f32 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x float>
   %nxv64i1_nxv64f64 = uitofp <vscale x 64 x i1> undef to <vscale x 64 x double>
 
+  %vp_nxv64fi8_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i8.nxv64half(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi8_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i8.nxv64float(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi8_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i8.nxv64double(<vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi16_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i16.nxv64half(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i16.nxv64float(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i16.nxv64double(<vscale x 64 x i16> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi32_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i32.nxv64half(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi32_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i32.nxv64float(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i32.nxv64double(<vscale x 64 x i32> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi64_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i64.nxv64half(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi64_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i64.nxv64float(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi64_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i64.nxv64double(<vscale x 64 x i64> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi1_nxv64f16 = call <vscale x 64 x half> @llvm.vp.sitofp.nxv64i1.nxv64half(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi1_nxv64f32 = call <vscale x 64 x float> @llvm.vp.sitofp.nxv64i1.nxv64float(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+  %vp_nxv64fi1_nxv64f64 = call <vscale x 64 x double> @llvm.vp.sitofp.nxv64i1.nxv64double(<vscale x 64 x i1> undef, <vscale x 64 x i1> undef, i32 undef)
+
   ret void
 }
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-cast.ll b/llvm/test/Analysis/CostModel/RISCV/vp-cast.ll
deleted file mode 100644
index ecd60050d564fc..00000000000000
--- a/llvm/test/Analysis/CostModel/RISCV/vp-cast.ll
+++ /dev/null
@@ -1,1004 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv32 -mattr=+v | FileCheck %s --check-prefixes=RV32
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv32 -mattr=+v --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=RV32_TYPEBASED
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=RV64
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=RV64_TYPEBASED
-
-define void @int_truncate() {
-; RV32-LABEL: 'int_truncate'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'int_truncate'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'int_truncate'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'int_truncate'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %1 = trunc i32 undef to i8
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <1 x i32> undef to <1 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = trunc <2 x i32> undef to <2 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = trunc <4 x i32> undef to <4 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = trunc <8 x i32> undef to <8 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = trunc <16 x i32> undef to <16 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <1 x i8> @llvm.vp.trunc.v1i8.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i8> @llvm.vp.trunc.v2i8.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i8> @llvm.vp.trunc.v4i8.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i8> @llvm.vp.trunc.v8i8.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %15 = call <16 x i8> @llvm.vp.trunc.v16i8.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %16 = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i8.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %19 = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  trunc i32 undef to i8
-  trunc <1 x i32> undef to <1 x i8>
-  trunc <2 x i32> undef to <2 x i8>
-  trunc <4 x i32> undef to <4 x i8>
-  trunc <8 x i32> undef to <8 x i8>
-  trunc <16 x i32> undef to <16 x i8>
-  trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
-  trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-  trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-  trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-  call <1 x i8> @llvm.vp.trunc.v1i32.v1i8(<1 x i32> undef, <1 x i1> undef, i32 undef)
-  call <2 x i8> @llvm.vp.trunc.v2i32.v2i8(<2 x i32> undef, <2 x i1> undef, i32 undef)
-  call <4 x i8> @llvm.vp.trunc.v4i32.v4i8(<4 x i32> undef, <4 x i1> undef, i32 undef)
-  call <8 x i8> @llvm.vp.trunc.v8i32.v8i8(<8 x i32> undef, <8 x i1> undef, i32 undef)
-  call <16 x i8> @llvm.vp.trunc.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i32.nxv1i8(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i8> @llvm.vp.trunc.nxv2i32.nxv2i8(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i8> @llvm.vp.trunc.nxv4i32.nxv4i8(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i32.nxv8i8(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @int_zext() {
-; RV32-LABEL: 'int_zext'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'int_zext'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'int_zext'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'int_zext'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = zext i32 undef to i64
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <1 x i32> undef to <1 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = zext <2 x i32> undef to <2 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = zext <4 x i32> undef to <4 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = zext <8 x i32> undef to <8 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = zext <16 x i32> undef to <16 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.zext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.zext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.zext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.zext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.zext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.zext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  zext i32 undef to i64
-  zext <1 x i32> undef to <1 x i64>
-  zext <2 x i32> undef to <2 x i64>
-  zext <4 x i32> undef to <4 x i64>
-  zext <8 x i32> undef to <8 x i64>
-  zext <16 x i32> undef to <16 x i64>
-  zext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-  zext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-  zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-  zext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-  call <1 x i64> @llvm.vp.zext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
-  call <2 x i64> @llvm.vp.zext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
-  call <4 x i64> @llvm.vp.zext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
-  call <8 x i64> @llvm.vp.zext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
-  call <16 x i64> @llvm.vp.zext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.zext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.zext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.zext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @int_sext() {
-; RV32-LABEL: 'int_sext'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'int_sext'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'int_sext'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'int_sext'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sext i32 undef to i64
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <1 x i32> undef to <1 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sext <2 x i32> undef to <2 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = sext <4 x i32> undef to <4 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = sext <8 x i32> undef to <8 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = sext <16 x i32> undef to <16 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i64> @llvm.vp.sext.v1i64.v1i32(<1 x i32> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.vp.sext.v2i64.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x i64> @llvm.vp.sext.v16i64.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.vp.sext.nxv1i64.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i64> @llvm.vp.sext.nxv2i64.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x i64> @llvm.vp.sext.nxv4i64.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i64.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  sext i32 undef to i64
-  sext <1 x i32> undef to <1 x i64>
-  sext <2 x i32> undef to <2 x i64>
-  sext <4 x i32> undef to <4 x i64>
-  sext <8 x i32> undef to <8 x i64>
-  sext <16 x i32> undef to <16 x i64>
-  sext <vscale x 1 x i32> undef to <vscale x 1 x i64>
-  sext <vscale x 2 x i32> undef to <vscale x 2 x i64>
-  sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-  sext <vscale x 8 x i32> undef to <vscale x 8 x i64>
-  call <1 x i64> @llvm.vp.sext.v1i32.v1i64(<1 x i32> undef, <1 x i1> undef, i32 undef)
-  call <2 x i64> @llvm.vp.sext.v2i32.v2i64(<2 x i32> undef, <2 x i1> undef, i32 undef)
-  call <4 x i64> @llvm.vp.sext.v4i32.v4i64(<4 x i32> undef, <4 x i1> undef, i32 undef)
-  call <8 x i64> @llvm.vp.sext.v8i32.v8i64(<8 x i32> undef, <8 x i1> undef, i32 undef)
-  call <16 x i64> @llvm.vp.sext.v16.v16(<16 x i32> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i64> @llvm.vp.sext.nxv1i32.nxv1i64(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i64> @llvm.vp.sext.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i64> @llvm.vp.sext.nxv4i32.nxv4i64(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fp_truncate() {
-; RV32-LABEL: 'fp_truncate'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'fp_truncate'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'fp_truncate'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'fp_truncate'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptrunc double undef to float
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptrunc <1 x double> undef to <1 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptrunc <2 x double> undef to <2 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptrunc <4 x double> undef to <4 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = fptrunc <8 x double> undef to <8 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %6 = fptrunc <16 x double> undef to <16 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x float> @llvm.vp.fptrunc.v1f32.v1f64(<1 x double> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x float> @llvm.vp.fptrunc.v2f32.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %15 = call <16 x float> @llvm.vp.fptrunc.v16f32.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1f32.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4f32.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8f32.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fptrunc double undef to float
-  fptrunc <1 x double> undef to <1 x float>
-  fptrunc <2 x double> undef to <2 x float>
-  fptrunc <4 x double> undef to <4 x float>
-  fptrunc <8 x double> undef to <8 x float>
-  fptrunc <16 x double> undef to <16 x float>
-  fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
-  fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-  fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-  fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-  call <1 x float> @llvm.vp.fptrunc.v1double.v1float(<1 x double> undef, <1 x i1> undef, i32 undef)
-  call <2 x float> @llvm.vp.fptrunc.v2double.v2float(<2 x double> undef, <2 x i1> undef, i32 undef)
-  call <4 x float> @llvm.vp.fptrunc.v4double.v4float(<4 x double> undef, <4 x i1> undef, i32 undef)
-  call <8 x float> @llvm.vp.fptrunc.v8double.v8float(<8 x double> undef, <8 x i1> undef, i32 undef)
-  call <16 x float> @llvm.vp.fptrunc.v16.v16(<16 x double> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x float> @llvm.vp.fptrunc.nxv1double.nxv1float(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2double.nxv2float(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x float> @llvm.vp.fptrunc.nxv4double.nxv4float(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x float> @llvm.vp.fptrunc.nxv8double.nxv8float(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fpext() {
-; RV32-LABEL: 'fpext'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'fpext'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'fpext'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'fpext'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fpext float undef to double
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fpext <1 x float> undef to <1 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fpext <2 x float> undef to <2 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = fpext <4 x float> undef to <4 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = fpext <8 x float> undef to <8 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = fpext <16 x float> undef to <16 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.fpext.v1f64.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fpext float undef to double
-  fpext <1 x float> undef to <1 x double>
-  fpext <2 x float> undef to <2 x double>
-  fpext <4 x float> undef to <4 x double>
-  fpext <8 x float> undef to <8 x double>
-  fpext <16 x float> undef to <16 x double>
-  fpext <vscale x 1 x float> undef to <vscale x 1 x double>
-  fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-  fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-  fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-  call <1 x double> @llvm.vp.fpext.v1float.v1double(<1 x float> undef, <1 x i1> undef, i32 undef)
-  call <2 x double> @llvm.vp.fpext.v2float.v2double(<2 x float> undef, <2 x i1> undef, i32 undef)
-  call <4 x double> @llvm.vp.fpext.v4float.v4double(<4 x float> undef, <4 x i1> undef, i32 undef)
-  call <8 x double> @llvm.vp.fpext.v8float.v8double(<8 x float> undef, <8 x i1> undef, i32 undef)
-  call <16 x double> @llvm.vp.fpext.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.fpext.nxv1float.nxv1double(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.fpext.nxv2float.nxv2double(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.fpext.nxv4float.nxv4double(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.fpext.nxv8float.nxv8double(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fptoui() {
-; RV32-LABEL: 'fptoui'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'fptoui'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'fptoui'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'fptoui'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptoui float undef to i32
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptoui <1 x float> undef to <1 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptoui <2 x float> undef to <2 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptoui <4 x float> undef to <4 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptoui <8 x float> undef to <8 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptoui <16 x float> undef to <16 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptoui.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptoui.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptoui.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fptoui float undef to i32
-  fptoui <1 x float> undef to <1 x i32>
-  fptoui <2 x float> undef to <2 x i32>
-  fptoui <4 x float> undef to <4 x i32>
-  fptoui <8 x float> undef to <8 x i32>
-  fptoui <16 x float> undef to <16 x i32>
-  fptoui <vscale x 1 x float> undef to <vscale x 1 x i32>
-  fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
-  fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
-  fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
-  call <1 x i32> @llvm.vp.fptoui.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
-  call <2 x i32> @llvm.vp.fptoui.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
-  call <4 x i32> @llvm.vp.fptoui.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
-  call <8 x i32> @llvm.vp.fptoui.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
-  call <16 x i32> @llvm.vp.fptoui.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.fptoui.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.fptoui.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.fptoui.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.fptoui.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @fptosi() {
-; RV32-LABEL: 'fptosi'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'fptosi'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'fptosi'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'fptosi'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = fptosi float undef to i32
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = fptosi <1 x float> undef to <1 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = fptosi <2 x float> undef to <2 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = fptosi <4 x float> undef to <4 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = fptosi <8 x float> undef to <8 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = fptosi <16 x float> undef to <16 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x i32> @llvm.vp.fptosi.v1i32.v1f32(<1 x float> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i32> @llvm.vp.fptosi.v2i32.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i32> @llvm.vp.fptosi.v16i32.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1i32.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2i32.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4i32.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8i32.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  fptosi float undef to i32
-  fptosi <1 x float> undef to <1 x i32>
-  fptosi <2 x float> undef to <2 x i32>
-  fptosi <4 x float> undef to <4 x i32>
-  fptosi <8 x float> undef to <8 x i32>
-  fptosi <16 x float> undef to <16 x i32>
-  fptosi <vscale x 1 x float> undef to <vscale x 1 x i32>
-  fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
-  fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
-  fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
-  call <1 x i32> @llvm.vp.fptosi.v1float.v1i32(<1 x float> undef, <1 x i1> undef, i32 undef)
-  call <2 x i32> @llvm.vp.fptosi.v2float.v2i32(<2 x float> undef, <2 x i1> undef, i32 undef)
-  call <4 x i32> @llvm.vp.fptosi.v4float.v4i32(<4 x float> undef, <4 x i1> undef, i32 undef)
-  call <8 x i32> @llvm.vp.fptosi.v8float.v8i32(<8 x float> undef, <8 x i1> undef, i32 undef)
-  call <16 x i32> @llvm.vp.fptosi.v16.v16(<16 x float> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x i32> @llvm.vp.fptosi.nxv1float.nxv1i32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x i32> @llvm.vp.fptosi.nxv2float.nxv2i32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x i32> @llvm.vp.fptosi.nxv4float.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x i32> @llvm.vp.fptosi.nxv8float.nxv8i32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @uitofp() {
-; RV32-LABEL: 'uitofp'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'uitofp'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'uitofp'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'uitofp'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = uitofp i64 undef to double
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = uitofp <1 x i64> undef to <1 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = uitofp <2 x i64> undef to <2 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = uitofp <4 x i64> undef to <4 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = uitofp <8 x i64> undef to <8 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = uitofp <16 x i64> undef to <16 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.uitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.uitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.uitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.uitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.uitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.uitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.uitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.uitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  uitofp i64 undef to double
-  uitofp <1 x i64> undef to <1 x double>
-  uitofp <2 x i64> undef to <2 x double>
-  uitofp <4 x i64> undef to <4 x double>
-  uitofp <8 x i64> undef to <8 x double>
-  uitofp <16 x i64> undef to <16 x double>
-  uitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-  uitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-  uitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-  uitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-  call <1 x double> @llvm.vp.uitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
-  call <2 x double> @llvm.vp.uitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
-  call <4 x double> @llvm.vp.uitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
-  call <8 x double> @llvm.vp.uitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
-  call <16 x double> @llvm.vp.uitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.uitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.uitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.uitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.uitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}
-
-define void @sitofp() {
-; RV32-LABEL: 'sitofp'
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV32_TYPEBASED-LABEL: 'sitofp'
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV32_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64-LABEL: 'sitofp'
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; RV64_TYPEBASED-LABEL: 'sitofp'
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = sitofp i64 undef to double
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = sitofp <1 x i64> undef to <1 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = sitofp <2 x i64> undef to <2 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = sitofp <4 x i64> undef to <4 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = sitofp <8 x i64> undef to <8 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = sitofp <16 x i64> undef to <16 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <1 x double> @llvm.vp.sitofp.v1f64.v1i64(<1 x i64> undef, <1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.vp.sitofp.v2f64.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.vp.sitofp.v8f64.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.vp.sitofp.v16f64.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.vp.sitofp.nxv1f64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.vp.sitofp.nxv2f64.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.vp.sitofp.nxv4f64.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.vp.sitofp.nxv8f64.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-; RV64_TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-  sitofp i64 undef to double
-  sitofp <1 x i64> undef to <1 x double>
-  sitofp <2 x i64> undef to <2 x double>
-  sitofp <4 x i64> undef to <4 x double>
-  sitofp <8 x i64> undef to <8 x double>
-  sitofp <16 x i64> undef to <16 x double>
-  sitofp <vscale x 1 x i64> undef to <vscale x 1 x double>
-  sitofp <vscale x 2 x i64> undef to <vscale x 2 x double>
-  sitofp <vscale x 4 x i64> undef to <vscale x 4 x double>
-  sitofp <vscale x 8 x i64> undef to <vscale x 8 x double>
-  call <1 x double> @llvm.vp.sitofp.v1i64.v1double(<1 x i64> undef, <1 x i1> undef, i32 undef)
-  call <2 x double> @llvm.vp.sitofp.v2i64.v2double(<2 x i64> undef, <2 x i1> undef, i32 undef)
-  call <4 x double> @llvm.vp.sitofp.v4i64.v4double(<4 x i64> undef, <4 x i1> undef, i32 undef)
-  call <8 x double> @llvm.vp.sitofp.v8i64.v8double(<8 x i64> undef, <8 x i1> undef, i32 undef)
-  call <16 x double> @llvm.vp.sitofp.v16.v16(<16 x i64> undef, <16 x i1> undef, i32 undef)
-  call <vscale x 1 x double> @llvm.vp.sitofp.nxv1i64.nxv1double(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
-  call <vscale x 2 x double> @llvm.vp.sitofp.nxv2i64.nxv2double(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
-  call <vscale x 4 x double> @llvm.vp.sitofp.nxv4i64.nxv4double(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
-  call <vscale x 8 x double> @llvm.vp.sitofp.nxv8i64.nxv8double(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
-  ret void
-}