[llvm] [RISCV][TTI] Clarify getRISCVInstructionCost usage and fix MVT for ISD::FP_EXTEND (PR #121556)

Fri Jan 3 01:43:50 PST 2025

https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/121556

This patch addresses
1. Adds a comment to explicitly state that getRISCVInstructionCost uses vtype associated with widening and narrowing instructions.
For example, vtype = (SEW):
For vfwcvt.f.f.v, the source is (SEW), the destination is (2 * SEW)
For vfncvt.f.f.w, the source is (2 * SEW), the destination is (SEW).
In these cases, the type passed to `getRISCVInstructionCost` differs
- The source type is used for `vfwcvt.f.f.v`.
- The destination type is used for `vfncvt.f.f.w`.

2. Fixes the MVT handling for ISD::FP_EXTEND.

>From 1b6178826b848cc2c997c281c448b760e39897ac Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 3 Jan 2025 01:20:43 -0800
Subject: [PATCH] [RISCV][TTI] Use vtype for getRISCVInstructionCost

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  15 +-
 .../Target/RISCV/RISCVTargetTransformInfo.h   |   5 +
 .../test/Analysis/CostModel/RISCV/arith-fp.ll |  88 ++++++------
 llvm/test/Analysis/CostModel/RISCV/cast.ll    | 136 +++++++++---------
 .../CostModel/RISCV/reduce-scalable-fp.ll     |  12 +-
 5 files changed, 132 insertions(+), 124 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 850d6244affa50..8484ff3696be48 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1301,13 +1301,16 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
                                             : RISCV::VFNCVT_F_F_W;
     InstructionCost Cost = 0;
     for (; SrcEltSize != DstEltSize;) {
-      MVT ElementMVT = (ISD == ISD::TRUNCATE)
-                           ? MVT::getIntegerVT(DstEltSize)
+      MVT ElementMVT = (ISD == ISD::TRUNCATE) ? MVT::getIntegerVT(DstEltSize)
+                       : (ISD == ISD::FP_EXTEND)
+                           ? MVT::getFloatingPointVT(SrcEltSize)
                            : MVT::getFloatingPointVT(DstEltSize);
-      MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
-      DstEltSize =
-          (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
-      Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);
+      MVT VTypeMVT = DstLT.second.changeVectorElementType(ElementMVT);
+      Cost += getRISCVInstructionCost(Op, VTypeMVT, CostKind);
+      if (DstEltSize > SrcEltSize)
+        SrcEltSize = SrcEltSize << 1;
+      else
+        DstEltSize = DstEltSize << 1;
     }
     return Cost;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9b364391f0fa47..3a9073731681ef 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -48,6 +48,11 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   /// actual target hardware.
   unsigned getEstimatedVLFor(VectorType *Ty);
 
+  /// This function calculates the costs for one or more RVV opcodes based
+  /// on the vtype and the cost kind.
+  /// \param Opcodes A list of opcodes of the RVV instruction to evaluate.
+  /// \param VT The MVT of vtype associated with the RVV instructions.
+  /// \param CostKind The type of cost to compute.
   InstructionCost getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
                                           TTI::TargetCostKind CostKind);
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
index d85c9e2a19209c..58b8066a080997 100644
--- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
@@ -12,13 +12,13 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fadd <1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fadd <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fadd <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd <vscale x 1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4BF16 = fadd <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8BF16 = fadd <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16BF16 = fadd <vscale x 16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
@@ -174,14 +174,14 @@ define void @fadd_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8F16 = fadd <8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16F16 = fadd <16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32F16 = fadd <32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd <vscale x 1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4F16 = fadd <vscale x 4 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8F16 = fadd <vscale x 8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16F16 = fadd <vscale x 16 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd <vscale x 32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fadd.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
@@ -234,13 +234,13 @@ define void @fsub() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fsub <1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fsub <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fsub <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub <vscale x 1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4BF16 = fsub <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8BF16 = fsub <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16BF16 = fsub <vscale x 16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
@@ -396,14 +396,14 @@ define void @fsub_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8F16 = fsub <8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16F16 = fsub <16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32F16 = fsub <32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub <vscale x 1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4F16 = fsub <vscale x 4 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8F16 = fsub <vscale x 8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16F16 = fsub <vscale x 16 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub <vscale x 32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fsub.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
@@ -456,13 +456,13 @@ define void @fmul() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fmul <1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fmul <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fmul <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul <vscale x 1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4BF16 = fmul <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8BF16 = fmul <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16BF16 = fmul <vscale x 16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
@@ -618,14 +618,14 @@ define void @fmul_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8F16 = fmul <8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16F16 = fmul <16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32F16 = fmul <32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul <vscale x 1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4F16 = fmul <vscale x 4 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8F16 = fmul <vscale x 8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16F16 = fmul <vscale x 16 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul <vscale x 32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fmul.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
@@ -678,13 +678,13 @@ define void @fdiv() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = fdiv <1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2BF16 = fdiv <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4BF16 = fdiv <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv <vscale x 1 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv <vscale x 2 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4BF16 = fdiv <vscale x 4 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8BF16 = fdiv <vscale x 8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16BF16 = fdiv <vscale x 16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef
@@ -840,14 +840,14 @@ define void @fdiv_f16() {
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8F16 = fdiv <8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16F16 = fdiv <16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32F16 = fdiv <32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv <vscale x 1 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv <vscale x 2 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV4F16 = fdiv <vscale x 4 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %NXV8F16 = fdiv <vscale x 8 x half> undef, undef
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %NXV16F16 = fdiv <vscale x 16 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv <vscale x 32 x half> undef, undef
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1F16_VP = call <1 x half> @llvm.vp.fdiv.v1f16(<1 x half> undef, <1 x half> undef, <1 x i1> undef, i32 undef)
 ; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F16_VP = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> undef, <2 x half> undef, <2 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll
index 04048b8ba17f17..9b2f70a15fabcb 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll
@@ -2988,41 +2988,41 @@ define void @fpext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v2f16_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v2f32_v2f64 = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f16_v4f32 = call <4 x float> @llvm.vp.fpext.v4f32.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8f32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16f32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32f32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64f32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128f32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v4f16_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v4f32_v4f64 = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_v8f16_v8f32 = call <8 x float> @llvm.vp.fpext.v8f32.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_v8f16_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v8f32_v8f64 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_v16f16_v16f32 = call <16 x float> @llvm.vp.fpext.v16f32.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_v16f16_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v16f32_v16f64 = call <16 x double> @llvm.vp.fpext.v16f64.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_v32f16_v32f32 = call <32 x float> @llvm.vp.fpext.v32f32.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_v32f16_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f16(<32 x half> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v32f32_v32f64 = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> undef, <32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_v64f16_v64f32 = call <64 x float> @llvm.vp.fpext.v64f32.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_v64f16_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f16(<64 x half> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v64f32_v64f64 = call <64 x double> @llvm.vp.fpext.v64f64.v64f32(<64 x float> undef, <64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_v128f16_v128f32 = call <128 x float> @llvm.vp.fpext.v128f32.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_v128f16_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f16(<128 x half> undef, <128 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_v128f32_v128f64 = call <128 x double> @llvm.vp.fpext.v128f64.v128f32(<128 x float> undef, <128 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1f32 = fpext <vscale x 1 x half> undef to <vscale x 1 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1f64 = fpext <vscale x 1 x half> undef to <vscale x 1 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f64 = fpext <vscale x 1 x float> undef to <vscale x 1 x double>
@@ -3030,41 +3030,41 @@ define void @fpext() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv1f16_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv1f32_nxv1f64 = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f16_nxv2f32 = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64f32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv2f16_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv2f32_nxv2f64 = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %vp_nxv4f16_nxv4f32 = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %vp_nxv4f16_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv4f32_nxv4f64 = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %nxv8f16_nxv8f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv8f32_nxv8f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %vp_nxv8f16_nxv8f32 = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %vp_nxv8f16_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv8f32_nxv8f64 = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %nxv16f16_nxv16f32 = fpext <vscale x 16 x half> undef to <vscale x 16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %nxv16f16_nxv16f64 = fpext <vscale x 16 x half> undef to <vscale x 16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv16f32_nxv16f64 = fpext <vscale x 16 x float> undef to <vscale x 16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %vp_nxv16f16_nxv16f32 = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %vp_nxv16f16_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv16f32_nxv16f64 = call <vscale x 16 x double> @llvm.vp.fpext.nxv16f64.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %nxv32f16_nxv32f32 = fpext <vscale x 32 x half> undef to <vscale x 32 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %nxv32f16_nxv32f64 = fpext <vscale x 32 x half> undef to <vscale x 32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv32f32_nxv32f64 = fpext <vscale x 32 x float> undef to <vscale x 32 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %vp_nxv32f16_nxv32f32 = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %vp_nxv32f16_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f16(<vscale x 32 x half> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv32f32_nxv32f64 = call <vscale x 32 x double> @llvm.vp.fpext.nxv32f64.nxv32f32(<vscale x 32 x float> undef, <vscale x 32 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %nxv64f16_nxv64f32 = fpext <vscale x 64 x half> undef to <vscale x 64 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %nxv64f16_nxv64f64 = fpext <vscale x 64 x half> undef to <vscale x 64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %nxv64f32_nxv64f64 = fpext <vscale x 64 x float> undef to <vscale x 64 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %vp_nxv64f16_nxv64f32 = call <vscale x 64 x float> @llvm.vp.fpext.nxv64f32.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %vp_nxv64f16_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f16(<vscale x 64 x half> undef, <vscale x 64 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %vp_nxv64f32_nxv64f64 = call <vscale x 64 x double> @llvm.vp.fpext.nxv64f64.nxv64f32(<vscale x 64 x float> undef, <vscale x 64 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2f16_v2f32 = fpext <2 x half> undef to <2 x float>
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
index 8ae387f48ccaac..21508715c0e4f5 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll
@@ -238,7 +238,7 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
 
 define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
@@ -254,7 +254,7 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 
 define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 4 x half> %v to <vscale x 4 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret float %red
 ;
@@ -358,7 +358,7 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
 
 define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
@@ -374,7 +374,7 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 
 define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = fpext <vscale x 2 x float> %v to <vscale x 2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
@@ -418,7 +418,7 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
 
 define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;
@@ -434,7 +434,7 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 
 define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
 ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = fpext <vscale x 4 x float> %v to <vscale x 4 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %e)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret double %red
 ;