[llvm] [AArch64] Fix SVE cost model for various math intrinsics (PR #184358)

Tue Mar 3 07:19:26 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: David Sherwood (david-arm)

<details>
<summary>Changes</summary>

The implementation of getIntrinsicInstrCost in BasicTTIImpl
assumes that for some intrinsics if we're using custom
lowering for the equivalent DAG node that the cost needs to
be 2, instead of 1 for legal ops. However, even though we
use custom lowering for these scalable vector operations
when SVE is available, we still end up generating the same
efficient codegen as fixed-width. This patch deals with a
few obvious intrinsics that we know get lowered to something
sensible and return the same cost as NEON, i.e. 1.

---

Patch is 138.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184358.diff


5 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+24) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+371-6) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-math.ll (+1-1) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll (+18-18) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll (+168-148) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ff957e0582aad..7f03d151bca5c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -696,6 +696,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     break;
   }
   case Intrinsic::abs: {
+    if (isa<ScalableVectorType>(RetTy) && ST->hasSVE()) {
+      auto LT = getTypeLegalizationCost(RetTy);
+      return LT.first;
+    }
+
+    // TODO: Why can't we just always return
+    // getTypeLegalizationCost(RetTy).first for all types?
     static const auto ValidAbsTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
                                      MVT::v8i16, MVT::v2i32, MVT::v4i32,
                                      MVT::v2i64};
@@ -1114,6 +1121,23 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
       return getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
     break;
   }
+  case Intrinsic::sqrt:
+  case Intrinsic::fabs:
+  case Intrinsic::ceil:
+  case Intrinsic::floor:
+  case Intrinsic::nearbyint:
+  case Intrinsic::round:
+  case Intrinsic::rint:
+  case Intrinsic::roundeven:
+  case Intrinsic::trunc:
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum: {
+    if (isa<ScalableVectorType>(RetTy) && ST->hasSVE()) {
+      auto LT = getTypeLegalizationCost(RetTy);
+      return LT.first;
+    }
+    break;
+  }
   default:
     break;
   }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 0836ef1b352c9..ede0e45e7c5bb 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -182,10 +182,10 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 3 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 3 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'reductions'
@@ -221,10 +221,10 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 3 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 3 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'reductions'
@@ -260,10 +260,10 @@ define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 3 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 3 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison)
@@ -1447,6 +1447,371 @@ define void @match() #3 {
   ret void
 }
 
+define void @sqrt() #1 {
+; CHECK-VSCALE-1-LABEL: 'sqrt'
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv8f16 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv4f32 = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv2f64 = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %sqrt.nxv4f64 = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-VSCALE-2-LABEL: 'sqrt'
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv8f16 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv4f32 = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv2f64 = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %sqrt.nxv4f64 = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'sqrt'
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv8f16 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv4f32 = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %sqrt.nxv2f64 = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %sqrt.nxv4f64 = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %sqrt.nxv8f16 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> poison)
+  %sqrt.nxv4f32 = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> poison)
+  %sqrt.nxv2f64 = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> poison)
+  %sqrt.nxv4f64 = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> poison)
+
+  ret void
+}
+
+define void @fabs() #1 {
+; CHECK-VSCALE-1-LABEL: 'fabs'
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv8f16 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv4f32 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv2f64 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %fabs.nxv4f64 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-VSCALE-2-LABEL: 'fabs'
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv8f16 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv4f32 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv2f64 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %fabs.nxv4f64 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'fabs'
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv8f16 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv4f32 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %fabs.nxv2f64 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %fabs.nxv4f64 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %fabs.nxv8f16 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> poison)
+  %fabs.nxv4f32 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
+  %fabs.nxv2f64 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> poison)
+  %fabs.nxv4f64 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> poison)
+
+  ret void
+}
+
+define void @ceil() #1 {
+; CHECK-VSCALE-1-LABEL: 'ceil'
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv8f16 = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv4f32 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv2f64 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %ceil.nxv4f64 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-VSCALE-2-LABEL: 'ceil'
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv8f16 = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv4f32 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv2f64 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %ceil.nxv4f64 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'ceil'
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv8f16 = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv4f32 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %ceil.nxv2f64 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %ceil.nxv4f64 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %ceil.nxv8f16 = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> poison)
+  %ceil.nxv4f32 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> poison)
+  %ceil.nxv2f64 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> poison)
+  %ceil.nxv4f64 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> poison)
+
+  ret void
+}
+
+define void @floor() #1 {
+; CHECK-VSCALE-1-LABEL: 'floor'
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv8f16 = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv4f32 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv2f64 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %floor.nxv4f64 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-VSCALE-2-LABEL: 'floor'
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv8f16 = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv4f32 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv2f64 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %floor.nxv4f64 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'floor'
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv8f16 = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv4f32 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %floor.nxv2f64 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %floor.nxv4f64 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %floor.nxv8f16 = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> poison)
+  %floor.nxv4f32 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> poison)
+  %floor.nxv2f64 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> poison)
+  %floor.nxv4f64 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> poison)
+
+  ret void
+}
+
+define void @nearbyint() #1 {
+; CHECK-VSCALE-1-LABEL: 'nearbyint'
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv8f16 = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv4f32 = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv2f64 = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 2 for: %nearbyint.nxv4f64 = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-VSCALE-2-LABEL: 'nearbyint'
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv8f16 = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv4f32 = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv2f64 = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 2 for: %nearbyint.nxv4f64 = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> poison)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; TYPE_BASED_ONLY-LABEL: 'nearbyint'
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv8f16 = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv4f32 = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 1 for: %nearbyint.nxv2f64 = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 2 for: %nearbyint.nxv4f64 = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> poison)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %nearbyint.nxv8f16 = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> poison)
+  %nearbyint.nxv4f32 = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> poison)
+  %nearbyint.nxv2f64 = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> poison)
+  %nearbyint.nxv4f64 = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> poison)
+
+  ret void
+}
+
+define void @round() #1 {
+; CHECK-VSCALE-1-LABEL: 'round'
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 1 for: %round.nxv8f16 = call <v...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/184358