[llvm] [RISCV][TTI] Scale the cost of trunc/fptrunc/fpext with LMUL (PR #87101)

Shih-Po Hung via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 29 11:29:36 PDT 2024


https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/87101

Use the destination data type to measure the LMUL size for latency/throughput cost

>From 5d48de2bdc48b24dfa84dddca76af9d1df2683da Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 29 Mar 2024 02:22:18 -0700
Subject: [PATCH] [RISCV][TTI] Scale the cost of trunc/fptrunc/fpext with LMUL

Use the destination data type to measure the LMUL size for
latency/throughput cost
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  24 +-
 llvm/test/Analysis/CostModel/RISCV/cast.ll    | 454 +++++++++---------
 .../CostModel/RISCV/reduce-scalable-fp.ll     |  12 +-
 .../CostModel/RISCV/rvv-insertelement.ll      |  84 ++--
 .../CostModel/RISCV/shuffle-broadcast.ll      |   2 +-
 5 files changed, 297 insertions(+), 279 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 38cdf3c47c6420..0889ab7fb4d8db 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -909,6 +909,7 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   if (!IsTypeLegal)
     return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
 
+  std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
   std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -943,13 +944,30 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
       // Instead we use the following instructions to truncate to mask vector:
       // vand.vi v8, v8, 1
       // vmsne.vi v0, v8, 0
-      return 2;
+      return getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
+                                     SrcLT.second, CostKind);
     }
     [[fallthrough]];
   case ISD::FP_EXTEND:
-  case ISD::FP_ROUND:
+  case ISD::FP_ROUND: {
     // Counts of narrow/widen instructions.
-    return std::abs(PowDiff);
+    unsigned SrcEltSize = Src->getScalarSizeInBits();
+    unsigned DstEltSize = Dst->getScalarSizeInBits();
+
+    InstructionCost Cost = 0;
+    for (; SrcEltSize != DstEltSize;) {
+      MVT ElementMVT = (ISD == ISD::TRUNCATE)
+                           ? MVT::getIntegerVT(DstEltSize)
+                           : MVT::getFloatingPointVT(DstEltSize);
+      MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
+      unsigned Op = (ISD == ISD::TRUNCATE)    ? RISCV::VNSRL_WI
+                    : (ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
+                                              : RISCV::VFNCVT_F_F_W;
+      DstEltSize = ISD == ISD::FP_EXTEND ? DstEltSize >> 1 : DstEltSize << 1;
+      Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);
+    }
+    return Cost;
+  }
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll
index 14da9a3f79d771..6ddd57a24c51f5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll
@@ -1035,17 +1035,17 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i1 = trunc <4 x i8> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i1 = trunc <4 x i16> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i16 = trunc <8 x i32> undef to <8 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i1 = trunc <8 x i8> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i1 = trunc <8 x i16> undef to <8 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1057,44 +1057,44 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32i1 = trunc <16 x i8> undef to <16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i8 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
@@ -1115,56 +1115,56 @@ define void @trunc() {
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i64_nxv8i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16i16 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i16 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16i32 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16i1 = trunc <vscale x 16 x i8> undef to <vscale x 16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i1 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32i16 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i16 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32i32 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_nxv32i1 = trunc <vscale x 32 x i8> undef to <vscale x 32 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_nxv32i1 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i32_nxv16i16 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16i64_nxv16i16 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16i32 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i1 = trunc <vscale x 16 x i8> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i1 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32i32_nxv32i16 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32i64_nxv32i16 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32i32 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i1 = trunc <vscale x 32 x i8> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i16_nxv32i1 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64i8 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i8>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64i16 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64i16 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i16>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv64i64_nxv64i32 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i32>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_nxv64i1 = trunc <vscale x 64 x i8> undef to <vscale x 64 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv64i16_nxv64i1 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i1>
-; RV32-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64i32_nxv64i16 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %nxv64i64_nxv64i16 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i16>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %nxv64i64_nxv64i32 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i32>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv64i8_nxv64i1 = trunc <vscale x 64 x i8> undef to <vscale x 64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_nxv64i1 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i1>
+; RV32-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Invalid cost for instruction: %nxv64i64_nxv64i1 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i1>
 ; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@@ -1188,17 +1188,17 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i1 = trunc <4 x i8> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i1 = trunc <4 x i16> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i16 = trunc <8 x i32> undef to <8 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i1 = trunc <8 x i8> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i1 = trunc <8 x i16> undef to <8 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8>
@@ -1210,43 +1210,43 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32i1 = trunc <16 x i8> undef to <16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
@@ -1268,57 +1268,57 @@ define void @trunc() {
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv2i64_nxv2i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv4i64_nxv4i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_nxv8i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i64_nxv8i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i64_nxv8i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16i16 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i16 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16i32 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16i1 = trunc <vscale x 16 x i8> undef to <vscale x 16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i1 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32i16 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i16 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32i32 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_nxv32i1 = trunc <vscale x 32 x i8> undef to <vscale x 32 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_nxv32i1 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv64i64_nxv64i8 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i8>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64i16 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64i16 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i16>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv64i64_nxv64i32 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i32>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_nxv64i1 = trunc <vscale x 64 x i8> undef to <vscale x 64 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv64i16_nxv64i1 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
-; RV64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64i1 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv8i64_nxv8i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv16i32_nxv16i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %nxv16i64_nxv16i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i32_nxv16i16 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16i64_nxv16i16 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16i32 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i1 = trunc <vscale x 16 x i8> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i1 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv16i32_nxv16i1 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_nxv16i1 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv32i16_nxv32i8 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i32_nxv32i8 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %nxv32i64_nxv32i8 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32i32_nxv32i16 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32i64_nxv32i16 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32i32 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i1 = trunc <vscale x 32 x i8> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv32i16_nxv32i1 = trunc <vscale x 32 x i16> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_nxv32i1 = trunc <vscale x 32 x i32> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv32i64_nxv32i1 = trunc <vscale x 32 x i64> undef to <vscale x 32 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv64i16_nxv64i8 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv64i32_nxv64i8 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %nxv64i64_nxv64i8 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i8>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64i32_nxv64i16 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64i64_nxv64i16 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i16>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64i64_nxv64i32 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i32>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %nxv64i8_nxv64i1 = trunc <vscale x 64 x i8> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_nxv64i1 = trunc <vscale x 64 x i16> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %nxv64i32_nxv64i1 = trunc <vscale x 64 x i32> undef to <vscale x 64 x i1>
+; RV64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %nxv64i64_nxv64i1 = trunc <vscale x 64 x i64> undef to <vscale x 64 x i1>
 ; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2i16_v2i8 = trunc <2 x i16> undef to <2 x i8>
@@ -1495,44 +1495,44 @@ define void @fpext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f64 = fpext <2 x half> undef to <2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f64 = fpext <2 x float> undef to <2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
@@ -1603,20 +1603,20 @@ define void @fptrunc() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f16 = fptrunc <vscale x 1 x float> undef to <vscale x 1 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1f16 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1f32 = fptrunc <vscale x 1 x double> undef to <vscale x 1 x float>
@@ -1624,20 +1624,20 @@ define void @fptrunc() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv1f16 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv1f32 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f16 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4f16 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f32 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8f16 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8f16 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8f32 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16f16 = fptrunc <vscale x 16 x float> undef to <vscale x 16 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16f16 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16f32 = fptrunc <vscale x 16 x double> undef to <vscale x 16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32f16 = fptrunc <vscale x 32 x float> undef to <vscale x 32 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32f16 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32f32 = fptrunc <vscale x 32 x double> undef to <vscale x 32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64f16 = fptrunc <vscale x 64 x float> undef to <vscale x 64 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64f16 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64f32 = fptrunc <vscale x 64 x double> undef to <vscale x 64 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half>
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 0dd3e3cf53e1ca..955116021aee0b 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -238,7 +238,7 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 
 define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
@@ -254,7 +254,7 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 
 define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
@@ -358,7 +358,7 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 
 define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
@@ -374,7 +374,7 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 
 define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
@@ -418,7 +418,7 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 
 define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
@@ -434,7 +434,7 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 
 define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
index 6e1ae0216f7655..8b68480788f79e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll
@@ -12,12 +12,12 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -66,12 +66,12 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -120,12 +120,12 @@ define void @insertelement_int(i32 %x) {
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV32V-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV32V-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
 ; RV32V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
@@ -177,12 +177,12 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -231,12 +231,12 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -285,12 +285,12 @@ define void @insertelement_int(i32 %x) {
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV64V-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV64V-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
 ; RV64V-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
@@ -341,13 +341,13 @@ define void @insertelement_int(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -395,13 +395,13 @@ define void @insertelement_int(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -449,13 +449,13 @@ define void @insertelement_int(i32 %x) {
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
 ; RV32ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
@@ -506,13 +506,13 @@ define void @insertelement_int(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 0
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0
@@ -560,13 +560,13 @@ define void @insertelement_int(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement <vscale x 2 x i1> undef, i1 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement <vscale x 4 x i1> undef, i1 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement <vscale x 8 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement <vscale x 16 x i1> undef, i1 undef, i32 1
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement <vscale x 32 x i1> undef, i1 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1
@@ -614,13 +614,13 @@ define void @insertelement_int(i32 %x) {
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement <vscale x 2 x i1> undef, i1 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement <vscale x 4 x i1> undef, i1 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement <vscale x 8 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
-; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement <vscale x 16 x i1> undef, i1 undef, i32 %x
+; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement <vscale x 32 x i1> undef, i1 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x
 ; RV64ZVE64X-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
index b763198e98bacd..79ba1562d0f884 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll
@@ -197,7 +197,7 @@ define void  @broadcast_fixed() #0{
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %41 = shufflevector <32 x i1> undef, <32 x i1> undef, <32 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %42 = shufflevector <64 x i1> undef, <64 x i1> undef, <64 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %43 = shufflevector <128 x i1> undef, <128 x i1> undef, <128 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %44 = shufflevector <128 x i1> %ins1, <128 x i1> poison, <128 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ins2 = insertelement <2 x i8> poison, i8 3, i32 0
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %45 = shufflevector <2 x i8> %ins2, <2 x i8> undef, <2 x i32> zeroinitializer



More information about the llvm-commits mailing list