[llvm] [CodeGen] Teach ReplaceWithVeclib to use correct calling convention (PR #180773)

Thu Feb 12 05:55:54 PST 2026

https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/180773

>From 69a42ab4dc909598fd42c2b601b7860f541e64cf Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 11 Feb 2026 15:29:24 +0000
Subject: [PATCH 1/2] [CodeGen] Teach ReplaceWithVeclib to use correct calling
 convention

We correctly replace calls to the LLVM intrinsic with the vector
math variant, but fail to also copy the calling convention. This
is important for veclibs such as ArmPL where the aarch64_vector_pcs
convention reduces the number of registers that need saving
across calls.
---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp        |  6 +-
 .../AArch64/replace-with-veclib-armpl.ll      | 68 +++++++++---------
 .../AArch64/replace-with-veclib-libmvec.ll    | 71 ++++++++++---------
 3 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 38c7cf93451cf..407d6d503cb10 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -48,6 +48,7 @@ STATISTIC(NumFuncUsedAdded,
 /// Function.
 Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
                          const StringRef TLIName,
+                         std::optional<CallingConv::ID> CC,
                          Function *ScalarFunc = nullptr) {
   Function *TLIFunc = M->getFunction(TLIName);
   if (!TLIFunc) {
@@ -55,6 +56,8 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
         Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
     if (ScalarFunc)
       TLIFunc->copyAttributesFrom(ScalarFunc);
+    if (CC)
+      TLIFunc->setCallingConv(*CC);
 
     LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
                       << TLIName << "` of type `" << *(TLIFunc->getType())
@@ -93,6 +96,7 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
   // Preserve fast math flags for FP math.
   if (isa<FPMathOperator>(Replacement))
     Replacement->copyFastMathFlags(II);
+  Replacement->setCallingConv(TLIVecFunc->getCallingConv());
 }
 
 /// Returns true when successfully replaced \p II, which is a call to a
@@ -196,7 +200,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
 
   Function *TLIFunc =
       getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(),
-                     II->getCalledFunction());
+                     VD->getCallingConv(), II->getCalledFunction());
   replaceWithTLIFunction(II, *OptInfo, TLIFunc);
   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
                     << "` with call to `" << TLIFunc->getName() << "`.\n");
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
index 8a0ac6d4ace7a..16ca55983dae4 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
@@ -21,7 +21,7 @@ declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_cos_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vcosq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vcosq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in)
@@ -31,7 +31,7 @@ define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 define <4 x float> @llvm_cos_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_cos_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vcosq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vcosq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in)
@@ -66,7 +66,7 @@ declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_exp_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexpq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vexpq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in)
@@ -76,7 +76,7 @@ define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 define <4 x float> @llvm_exp_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_exp_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexpq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vexpq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in)
@@ -111,7 +111,7 @@ declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
@@ -121,7 +121,7 @@ define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
@@ -156,7 +156,7 @@ declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_exp2_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
@@ -166,7 +166,7 @@ define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_exp2_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
@@ -201,7 +201,7 @@ declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_log_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_log_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlogq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vlogq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in)
@@ -211,7 +211,7 @@ define <2 x double> @llvm_log_f64(<2 x double> %in) {
 define <4 x float> @llvm_log_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_log_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlogq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vlogq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in)
@@ -246,7 +246,7 @@ declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_log10_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
@@ -256,7 +256,7 @@ define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 define <4 x float> @llvm_log10_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_log10_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
@@ -291,7 +291,7 @@ declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_log2_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
@@ -301,7 +301,7 @@ define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 define <4 x float> @llvm_log2_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_log2_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
@@ -336,7 +336,7 @@ declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4
 define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
 ; CHECK-LABEL: define <2 x double> @llvm_pow_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
@@ -346,7 +346,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
 define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
 ; CHECK-LABEL: define <4 x float> @llvm_pow_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
@@ -381,7 +381,7 @@ declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_sin_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
@@ -391,7 +391,7 @@ define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 define <4 x float> @llvm_sin_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_sin_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
@@ -426,7 +426,7 @@ declare <vscale x 4 x float> @llvm.tan.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_tan_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vtanq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vtanq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in)
@@ -436,7 +436,7 @@ define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 define <4 x float> @llvm_tan_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_tan_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vtanq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vtanq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in)
@@ -471,7 +471,7 @@ declare <vscale x 4 x float> @llvm.acos.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_acos_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vacosq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vacosq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.acos.v2f64(<2 x double> %in)
@@ -481,7 +481,7 @@ define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 define <4 x float> @llvm_acos_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_acos_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vacosq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vacosq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.acos.v4f32(<4 x float> %in)
@@ -516,7 +516,7 @@ declare <vscale x 4 x float> @llvm.asin.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_asin_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vasinq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vasinq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.asin.v2f64(<2 x double> %in)
@@ -526,7 +526,7 @@ define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 define <4 x float> @llvm_asin_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_asin_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vasinq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vasinq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.asin.v4f32(<4 x float> %in)
@@ -561,7 +561,7 @@ declare <vscale x 4 x float> @llvm.atan.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_atan_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vatanq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vatanq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan.v2f64(<2 x double> %in)
@@ -571,7 +571,7 @@ define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 define <4 x float> @llvm_atan_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_atan_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vatanq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vatanq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan.v4f32(<4 x float> %in)
@@ -606,7 +606,7 @@ declare <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float>, <vscale x
 define <2 x double> @llvm_atan2_f64(<2 x double> %in1, <2 x double> %in2) {
 ; CHECK-LABEL: define <2 x double> @llvm_atan2_f64
 ; CHECK-SAME: (<2 x double> [[IN1:%.*]], <2 x double> [[IN2:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vatan2q_f64(<2 x double> [[IN1]], <2 x double> [[IN2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vatan2q_f64(<2 x double> [[IN1]], <2 x double> [[IN2]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %in1, <2 x double> %in2)
@@ -616,7 +616,7 @@ define <2 x double> @llvm_atan2_f64(<2 x double> %in1, <2 x double> %in2) {
 define <4 x float> @llvm_atan2_f32(<4 x float> %in1, <4 x float> %in2) {
 ; CHECK-LABEL: define <4 x float> @llvm_atan2_f32
 ; CHECK-SAME: (<4 x float> [[IN1:%.*]], <4 x float> [[IN2:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vatan2q_f32(<4 x float> [[IN1]], <4 x float> [[IN2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vatan2q_f32(<4 x float> [[IN1]], <4 x float> [[IN2]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %in1, <4 x float> %in2)
@@ -651,7 +651,7 @@ declare <vscale x 4 x float> @llvm.cosh.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_cosh_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vcoshq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vcoshq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cosh.v2f64(<2 x double> %in)
@@ -661,7 +661,7 @@ define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_cosh_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vcoshq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vcoshq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cosh.v4f32(<4 x float> %in)
@@ -696,7 +696,7 @@ declare <vscale x 4 x float> @llvm.sinh.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_sinh_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinhq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vsinhq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sinh.v2f64(<2 x double> %in)
@@ -706,7 +706,7 @@ define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_sinh_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinhq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vsinhq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sinh.v4f32(<4 x float> %in)
@@ -741,7 +741,7 @@ declare <vscale x 4 x float> @llvm.tanh.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_tanh_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vtanhq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vtanhq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tanh.v2f64(<2 x double> %in)
@@ -751,7 +751,7 @@ define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 define <4 x float> @llvm_tanh_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_tanh_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vtanhq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vtanhq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tanh.v4f32(<4 x float> %in)
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
index 6323d942a08e7..bd2419a8a7e58 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
@@ -44,7 +44,7 @@ define <4 x float> @llvm_copysign_f32(<4 x float> %mag, <4 x float> %sgn) {
 
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_cos_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cos(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in)
@@ -53,7 +53,7 @@ define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_cos_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_cos_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_cosf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in)
@@ -62,7 +62,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_exp_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in)
@@ -71,7 +71,7 @@ define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_exp_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_exp_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_expf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in)
@@ -80,7 +80,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_exp10_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
@@ -89,7 +89,7 @@ define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_exp10_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
@@ -98,7 +98,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_exp2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
@@ -107,7 +107,7 @@ define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_exp2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
@@ -170,7 +170,7 @@ define <4 x float> @llvm_fma_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 
 define <2 x double> @llvm_log_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_log_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in)
@@ -179,7 +179,7 @@ define <2 x double> @llvm_log_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_log_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_log_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_logf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in)
@@ -188,7 +188,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_log10_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
@@ -197,7 +197,7 @@ define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_log10_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_log10_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
@@ -206,7 +206,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_log2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log2(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
@@ -215,7 +215,7 @@ define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_log2_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_log2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log2f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
@@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
 ; CHECK-LABEL: @llvm_pow_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
@@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
 
 define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
 ; CHECK-LABEL: @llvm_pow_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)
@@ -332,7 +332,7 @@ define <4 x float> @llvm_round_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_sin_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sin(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
@@ -341,7 +341,7 @@ define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_sin_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_sin_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
@@ -368,7 +368,7 @@ define <4 x float> @llvm_sqrt_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_tan_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in)
@@ -377,7 +377,7 @@ define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_tan_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_tan_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in)
@@ -386,7 +386,7 @@ define <4 x float> @llvm_tan_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_acos_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_acos(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.acos.v2f64(<2 x double> %in)
@@ -395,7 +395,7 @@ define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_acos_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_acos_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_acosf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.acos.v4f32(<4 x float> %in)
@@ -404,7 +404,7 @@ define <4 x float> @llvm_acos_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_asin_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_asin(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.asin.v2f64(<2 x double> %in)
@@ -413,7 +413,7 @@ define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_asin_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_asin_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_asinf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.asin.v4f32(<4 x float> %in)
@@ -422,7 +422,7 @@ define <4 x float> @llvm_asin_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_atan_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_atan(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan.v2f64(<2 x double> %in)
@@ -431,7 +431,7 @@ define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_atan_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_atan_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_atanf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan.v4f32(<4 x float> %in)
@@ -440,7 +440,7 @@ define <4 x float> @llvm_atan_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: @llvm_atan2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[INX:%.*]], <2 x double> [[INY:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %x, <2 x double> %y)
@@ -449,7 +449,7 @@ define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
 
 define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @llvm_atan2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[INX:%.*]], <4 x float> [[INY:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %x, <4 x float> %y)
@@ -458,7 +458,7 @@ define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
 
 define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_cosh_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cosh.v2f64(<2 x double> %in)
@@ -467,7 +467,7 @@ define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_cosh_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_coshf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cosh.v4f32(<4 x float> %in)
@@ -476,7 +476,7 @@ define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_sinh_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sinh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sinh.v2f64(<2 x double> %in)
@@ -485,7 +485,7 @@ define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_sinh_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sinh.v4f32(<4 x float> %in)
@@ -494,7 +494,7 @@ define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_tanh_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tanh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tanh.v2f64(<2 x double> %in)
@@ -503,7 +503,7 @@ define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_tanh_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_tanh_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tanh.v4f32(<4 x float> %in)
@@ -573,5 +573,6 @@ declare <4 x float> @llvm.tan.v4f32(<4 x float>)
 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.

>From 58402c139747cf13a15bcfeb6daa4e094aa5b809 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 12 Feb 2026 13:55:04 +0000
Subject: [PATCH 2/2] Address review comment

---
 llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
index 16ca55983dae4..cd923910a91c9 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
@@ -3,12 +3,6 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-;
-; The replace-with-veclib pass does not work with scalable types, thus
-; the mappings aren't utilised. Tests will need to be regenerated when the
-; pass is improved.
-;
-
 declare <2 x double> @llvm.cos.v2f64(<2 x double>)
 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
 declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)