[llvm] [RISCV] Unify non-vp and vp rounding intrinsic costing (PR #147872)

Wed Jul 9 19:06:31 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

Currently we have slightly different costing for the vp and non-vp version of the rounding intrinsics.

We can delete this code and use the generic BasicTTIImpl code for the vp intrinsics which falls back to the non-vp versions.

I'm not sure if the zvfh costing is correct, this should probably be fixed in a follow up patch. At the moment the non-vp cost is more important since it is what the loop vectorizer will use.

---

Patch is 67.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147872.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (-41) 
- (modified) llvm/test/Analysis/CostModel/RISCV/fround.ll (+165-165) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 67a51c12b508e..ac28c0e7a8cf2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1244,17 +1244,6 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
     {Intrinsic::vp_cttz, MVT::i64, 25},
 };
 
-static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) {
-  switch (ID) {
-#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD)                                    \
-  case Intrinsic::VPID:                                                        \
-    return ISD::VPSD;
-#include "llvm/IR/VPIntrinsics.def"
-#undef HELPER_MAP_VPID_TO_VPSD
-  }
-  return ISD::DELETED_NODE;
-}
-
 InstructionCost
 RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                     TTI::TargetCostKind CostKind) const {
@@ -1482,36 +1471,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 
     return Cost;
   }
-  case Intrinsic::vp_rint: {
-    // RISC-V target uses at least 5 instructions to lower rounding intrinsics.
-    unsigned Cost = 5;
-    auto LT = getTypeLegalizationCost(RetTy);
-    if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
-      return Cost * LT.first;
-    break;
-  }
-  case Intrinsic::vp_nearbyint: {
-    // More one read and one write for fflags than vp_rint.
-    unsigned Cost = 7;
-    auto LT = getTypeLegalizationCost(RetTy);
-    if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
-      return Cost * LT.first;
-    break;
-  }
-  case Intrinsic::vp_ceil:
-  case Intrinsic::vp_floor:
-  case Intrinsic::vp_round:
-  case Intrinsic::vp_roundeven:
-  case Intrinsic::vp_roundtozero: {
-    // Rounding with static rounding mode needs two more instructions to
-    // swap/write FRM than vp_rint.
-    unsigned Cost = 7;
-    auto LT = getTypeLegalizationCost(RetTy);
-    unsigned VPISD = getISDForVPIntrinsicID(ICA.getID());
-    if (TLI->isOperationCustom(VPISD, LT.second))
-      return Cost * LT.first;
-    break;
-  }
   case Intrinsic::vp_select: {
     Intrinsic::ID IID = ICA.getID();
     std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID);
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index 94c09389cadec..602ef5c94d721 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -1126,23 +1126,23 @@ define void @vp_ceil() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x bfloat> @llvm.vp.ceil.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1176,15 +1176,15 @@ define void @vp_ceil() {
 
 define void @vp_ceil_f16() {
 ; ZVFH-LABEL: 'vp_ceil_f16'
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> undef, i32 undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x i1> undef, i32 undef)
 ; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; ZVFHMIN-LABEL: 'vp_ceil_f16'
@@ -1222,23 +1222,23 @@ define void @vp_floor() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %11 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %21 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call <2 x bfloat> @llvm.vp.floor.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
@@ -1272,15 +1272,15 @@ define void @vp_floor() {
 
 define void @vp_floor_f16() {
 ; ZVFH-LABEL: 'vp_floor_f16'
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/147872