[llvm] a6b7f42 - [CodeGen] Teach ReplaceWithVeclib to use correct calling convention (#180773)

Tue Feb 17 02:36:44 PST 2026

Author: David Sherwood
Date: 2026-02-17T10:36:40Z
New Revision: a6b7f42bc802dbc15940a829f8e52209385d4283

URL: https://github.com/llvm/llvm-project/commit/a6b7f42bc802dbc15940a829f8e52209385d4283
DIFF: https://github.com/llvm/llvm-project/commit/a6b7f42bc802dbc15940a829f8e52209385d4283.diff

LOG: [CodeGen] Teach ReplaceWithVeclib to use correct calling convention (#180773)

We correctly replace calls to the LLVM intrinsic with the vector
math variant, but fail to also copy the calling convention. This
is important for veclibs such as ArmPL where the aarch64_vector_pcs
convention reduces the number of registers that need saving
across calls.

Added: 
    

Modified: 
    llvm/lib/CodeGen/ReplaceWithVeclib.cpp
    llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
    llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 38c7cf93451cf..407d6d503cb10 100644

--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -48,6 +48,7 @@ STATISTIC(NumFuncUsedAdded,
 /// Function.
 Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
                          const StringRef TLIName,
+                         std::optional<CallingConv::ID> CC,
                          Function *ScalarFunc = nullptr) {
   Function *TLIFunc = M->getFunction(TLIName);
   if (!TLIFunc) {
@@ -55,6 +56,8 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
         Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
     if (ScalarFunc)
       TLIFunc->copyAttributesFrom(ScalarFunc);
+    if (CC)
+      TLIFunc->setCallingConv(*CC);
 
     LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
                       << TLIName << "` of type `" << *(TLIFunc->getType())
@@ -93,6 +96,7 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
   // Preserve fast math flags for FP math.
   if (isa<FPMathOperator>(Replacement))
     Replacement->copyFastMathFlags(II);
+  Replacement->setCallingConv(TLIVecFunc->getCallingConv());
 }
 
 /// Returns true when successfully replaced \p II, which is a call to a
@@ -196,7 +200,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
 
   Function *TLIFunc =
       getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(),
-                     II->getCalledFunction());
+                     VD->getCallingConv(), II->getCalledFunction());
   replaceWithTLIFunction(II, *OptInfo, TLIFunc);
   LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
                     << "` with call to `" << TLIFunc->getName() << "`.\n");

diff  --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
index 8a0ac6d4ace7a..cd923910a91c9 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
@@ -3,12 +3,6 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-;
-; The replace-with-veclib pass does not work with scalable types, thus
-; the mappings aren't utilised. Tests will need to be regenerated when the
-; pass is improved.
-;
-
 declare <2 x double> @llvm.cos.v2f64(<2 x double>)
 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
 declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
@@ -21,7 +15,7 @@ declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_cos_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vcosq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vcosq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in)
@@ -31,7 +25,7 @@ define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 define <4 x float> @llvm_cos_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_cos_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vcosq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vcosq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in)
@@ -66,7 +60,7 @@ declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_exp_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexpq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vexpq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in)
@@ -76,7 +70,7 @@ define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 define <4 x float> @llvm_exp_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_exp_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexpq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vexpq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in)
@@ -111,7 +105,7 @@ declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
@@ -121,7 +115,7 @@ define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
@@ -156,7 +150,7 @@ declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_exp2_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
@@ -166,7 +160,7 @@ define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_exp2_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
@@ -201,7 +195,7 @@ declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_log_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_log_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlogq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vlogq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in)
@@ -211,7 +205,7 @@ define <2 x double> @llvm_log_f64(<2 x double> %in) {
 define <4 x float> @llvm_log_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_log_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlogq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vlogq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in)
@@ -246,7 +240,7 @@ declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_log10_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
@@ -256,7 +250,7 @@ define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 define <4 x float> @llvm_log10_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_log10_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
@@ -291,7 +285,7 @@ declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_log2_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
@@ -301,7 +295,7 @@ define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 define <4 x float> @llvm_log2_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_log2_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
@@ -336,7 +330,7 @@ declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4
 define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
 ; CHECK-LABEL: define <2 x double> @llvm_pow_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
@@ -346,7 +340,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
 define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
 ; CHECK-LABEL: define <4 x float> @llvm_pow_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
@@ -381,7 +375,7 @@ declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_sin_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
@@ -391,7 +385,7 @@ define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 define <4 x float> @llvm_sin_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_sin_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
@@ -426,7 +420,7 @@ declare <vscale x 4 x float> @llvm.tan.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_tan_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vtanq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vtanq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in)
@@ -436,7 +430,7 @@ define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 define <4 x float> @llvm_tan_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_tan_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vtanq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vtanq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in)
@@ -471,7 +465,7 @@ declare <vscale x 4 x float> @llvm.acos.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_acos_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vacosq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vacosq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.acos.v2f64(<2 x double> %in)
@@ -481,7 +475,7 @@ define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 define <4 x float> @llvm_acos_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_acos_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vacosq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vacosq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.acos.v4f32(<4 x float> %in)
@@ -516,7 +510,7 @@ declare <vscale x 4 x float> @llvm.asin.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_asin_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vasinq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vasinq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.asin.v2f64(<2 x double> %in)
@@ -526,7 +520,7 @@ define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 define <4 x float> @llvm_asin_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_asin_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vasinq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vasinq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.asin.v4f32(<4 x float> %in)
@@ -561,7 +555,7 @@ declare <vscale x 4 x float> @llvm.atan.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_atan_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vatanq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vatanq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan.v2f64(<2 x double> %in)
@@ -571,7 +565,7 @@ define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 define <4 x float> @llvm_atan_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_atan_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vatanq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vatanq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan.v4f32(<4 x float> %in)
@@ -606,7 +600,7 @@ declare <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float>, <vscale x
 define <2 x double> @llvm_atan2_f64(<2 x double> %in1, <2 x double> %in2) {
 ; CHECK-LABEL: define <2 x double> @llvm_atan2_f64
 ; CHECK-SAME: (<2 x double> [[IN1:%.*]], <2 x double> [[IN2:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vatan2q_f64(<2 x double> [[IN1]], <2 x double> [[IN2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vatan2q_f64(<2 x double> [[IN1]], <2 x double> [[IN2]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %in1, <2 x double> %in2)
@@ -616,7 +610,7 @@ define <2 x double> @llvm_atan2_f64(<2 x double> %in1, <2 x double> %in2) {
 define <4 x float> @llvm_atan2_f32(<4 x float> %in1, <4 x float> %in2) {
 ; CHECK-LABEL: define <4 x float> @llvm_atan2_f32
 ; CHECK-SAME: (<4 x float> [[IN1:%.*]], <4 x float> [[IN2:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vatan2q_f32(<4 x float> [[IN1]], <4 x float> [[IN2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vatan2q_f32(<4 x float> [[IN1]], <4 x float> [[IN2]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %in1, <4 x float> %in2)
@@ -651,7 +645,7 @@ declare <vscale x 4 x float> @llvm.cosh.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_cosh_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vcoshq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vcoshq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cosh.v2f64(<2 x double> %in)
@@ -661,7 +655,7 @@ define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_cosh_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vcoshq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vcoshq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cosh.v4f32(<4 x float> %in)
@@ -696,7 +690,7 @@ declare <vscale x 4 x float> @llvm.sinh.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_sinh_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinhq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vsinhq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sinh.v2f64(<2 x double> %in)
@@ -706,7 +700,7 @@ define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_sinh_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinhq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vsinhq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sinh.v4f32(<4 x float> %in)
@@ -741,7 +735,7 @@ declare <vscale x 4 x float> @llvm.tanh.nxv4f32(<vscale x 4 x float>)
 define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_tanh_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vtanhq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @armpl_vtanhq_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tanh.v2f64(<2 x double> %in)
@@ -751,7 +745,7 @@ define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 define <4 x float> @llvm_tanh_f32(<4 x float> %in) {
 ; CHECK-LABEL: define <4 x float> @llvm_tanh_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vtanhq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @armpl_vtanhq_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tanh.v4f32(<4 x float> %in)

diff  --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
index 6323d942a08e7..bd2419a8a7e58 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
@@ -44,7 +44,7 @@ define <4 x float> @llvm_copysign_f32(<4 x float> %mag, <4 x float> %sgn) {
 
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_cos_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cos(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in)
@@ -53,7 +53,7 @@ define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_cos_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_cos_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_cosf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in)
@@ -62,7 +62,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_exp_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in)
@@ -71,7 +71,7 @@ define <2 x double> @llvm_exp_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_exp_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_exp_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_expf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in)
@@ -80,7 +80,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_exp10_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
@@ -89,7 +89,7 @@ define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_exp10_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
@@ -98,7 +98,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_exp2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
@@ -107,7 +107,7 @@ define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_exp2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
@@ -170,7 +170,7 @@ define <4 x float> @llvm_fma_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 
 define <2 x double> @llvm_log_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_log_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in)
@@ -179,7 +179,7 @@ define <2 x double> @llvm_log_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_log_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_log_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_logf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in)
@@ -188,7 +188,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_log10_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
@@ -197,7 +197,7 @@ define <2 x double> @llvm_log10_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_log10_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_log10_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
@@ -206,7 +206,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_log2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log2(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
@@ -215,7 +215,7 @@ define <2 x double> @llvm_log2_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_log2_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_log2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log2f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
@@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
 ; CHECK-LABEL: @llvm_pow_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
@@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
 
 define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
 ; CHECK-LABEL: @llvm_pow_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)
@@ -332,7 +332,7 @@ define <4 x float> @llvm_round_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_sin_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sin(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
@@ -341,7 +341,7 @@ define <2 x double> @llvm_sin_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_sin_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_sin_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
@@ -368,7 +368,7 @@ define <4 x float> @llvm_sqrt_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_tan_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in)
@@ -377,7 +377,7 @@ define <2 x double> @llvm_tan_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_tan_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_tan_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in)
@@ -386,7 +386,7 @@ define <4 x float> @llvm_tan_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_acos_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_acos(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.acos.v2f64(<2 x double> %in)
@@ -395,7 +395,7 @@ define <2 x double> @llvm_acos_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_acos_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_acos_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_acosf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.acos.v4f32(<4 x float> %in)
@@ -404,7 +404,7 @@ define <4 x float> @llvm_acos_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_asin_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_asin(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.asin.v2f64(<2 x double> %in)
@@ -413,7 +413,7 @@ define <2 x double> @llvm_asin_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_asin_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_asin_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_asinf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.asin.v4f32(<4 x float> %in)
@@ -422,7 +422,7 @@ define <4 x float> @llvm_asin_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_atan_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_atan(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan.v2f64(<2 x double> %in)
@@ -431,7 +431,7 @@ define <2 x double> @llvm_atan_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_atan_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_atan_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_atanf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan.v4f32(<4 x float> %in)
@@ -440,7 +440,7 @@ define <4 x float> @llvm_atan_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: @llvm_atan2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[INX:%.*]], <2 x double> [[INY:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %x, <2 x double> %y)
@@ -449,7 +449,7 @@ define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
 
 define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @llvm_atan2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[INX:%.*]], <4 x float> [[INY:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %x, <4 x float> %y)
@@ -458,7 +458,7 @@ define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
 
 define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_cosh_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.cosh.v2f64(<2 x double> %in)
@@ -467,7 +467,7 @@ define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_cosh_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_coshf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.cosh.v4f32(<4 x float> %in)
@@ -476,7 +476,7 @@ define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_sinh_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sinh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.sinh.v2f64(<2 x double> %in)
@@ -485,7 +485,7 @@ define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_sinh_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.sinh.v4f32(<4 x float> %in)
@@ -494,7 +494,7 @@ define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_tanh_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tanh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.tanh.v2f64(<2 x double> %in)
@@ -503,7 +503,7 @@ define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
 
 define <4 x float> @llvm_tanh_f32(<4 x float> %in) {
 ; CHECK-LABEL: @llvm_tanh_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.tanh.v4f32(<4 x float> %in)
@@ -573,5 +573,6 @@ declare <4 x float> @llvm.tan.v4f32(<4 x float>)
 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.