[llvm] c377ce1 - [AArch64][VecLib] Add libmvec support for AArch64 targets (#143696)

Tue Jun 17 03:07:49 PDT 2025

Author: Mary Kassayova
Date: 2025-06-17T11:07:43+01:00
New Revision: c377ce1216a8ce73c940d2366a7bf223790f43b4

URL: https://github.com/llvm/llvm-project/commit/c377ce1216a8ce73c940d2366a7bf223790f43b4
DIFF: https://github.com/llvm/llvm-project/commit/c377ce1216a8ce73c940d2366a7bf223790f43b4.diff

LOG: [AArch64][VecLib] Add libmvec support for AArch64 targets (#143696)

This patch adds support for the `libmvec` vector library on AArch64
targets. Currently, all `libmvec` functions in GLIBC version 2.40 are
supported. The full list of math functions enabled can be found
[here](https://github.com/bminor/glibc/blob/96abd59bf2a11ddd4e7ccaac840ec13c0b62d3ba/sysdeps/aarch64/fpu/Versions)
(up to GLIBC 2.40).

Previously, `libmvec` was only supported on x86_64 targets. Attempts to
use it on AArch64 resulted in the following error from Clang:
`unsupported option 'libmvec' for target 'aarch64'`.

Added: 
    llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll
    llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/test/Driver/fveclib.c
    llvm/include/llvm/Analysis/VecFuncs.def
    llvm/lib/Analysis/TargetLibraryInfo.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
    llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
    llvm/test/Transforms/Util/add-TLI-mappings.ll

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d32d3921b74fc..03641f5d0ea0d 100644

--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -359,6 +359,8 @@ Modified Compiler Flags
 
 - The ``-fchar8_t`` flag is no longer considered in non-C++ languages modes. (#GH55373)
 
+- The ``-fveclib=libmvec`` option now supports AArch64 targets (requires GLIBC 2.40 or newer).
+
 Removed Compiler Flags
 -------------------------
 

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1ba52d50056e7..0ffd8c40da7da 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3479,8 +3479,9 @@ def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>,
   Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
     HelpText<"Use the given vector functions library">,
     HelpTextForVariants<[ClangOption, CC1Option],
-      "Use the given vector functions library. "
-      "Note: -fveclib={ArmPL,SLEEF} implies -fno-math-errno">,
+      "Use the given vector functions library.\n"
+      "  Note: -fveclib={ArmPL,SLEEF,libmvec} implies -fno-math-errno.\n"
+      "  Note: -fveclib=libmvec on AArch64 requires GLIBC 2.40 or newer.">,
     Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,AMDLIBM,none">,
     NormalizedValuesScope<"llvm::driver::VectorLibrary">,
     NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF",

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bb7e5f424337b..a78a1c8978183 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5702,11 +5702,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
           Triple.getArch() != llvm::Triple::x86_64)
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Name << Triple.getArchName();
-    } else if (Name == "libmvec" || Name == "AMDLIBM") {
+    } else if (Name == "AMDLIBM") {
       if (Triple.getArch() != llvm::Triple::x86 &&
           Triple.getArch() != llvm::Triple::x86_64)
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Name << Triple.getArchName();
+    } else if (Name == "libmvec") {
+      if (Triple.getArch() != llvm::Triple::x86 &&
+          Triple.getArch() != llvm::Triple::x86_64 &&
+          Triple.getArch() != llvm::Triple::aarch64 &&
+          Triple.getArch() != llvm::Triple::aarch64_be)
+        D.Diag(diag::err_drv_unsupported_opt_for_target)
+            << Name << Triple.getArchName();
     } else if (Name == "SLEEF" || Name == "ArmPL") {
       if (Triple.getArch() != llvm::Triple::aarch64 &&
           Triple.getArch() != llvm::Triple::aarch64_be &&

diff  --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c
index 5420555c36a2a..c57e9aa7a3cc2 100644
--- a/clang/test/Driver/fveclib.c
+++ b/clang/test/Driver/fveclib.c
@@ -1,6 +1,7 @@
 // RUN: %clang -### -c -fveclib=none %s 2>&1 | FileCheck --check-prefix=CHECK-NOLIB %s
 // RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 | FileCheck --check-prefix=CHECK-ACCELERATE %s
 // RUN: %clang -### -c --target=x86_64-unknown-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-libmvec %s
+// RUN: %clang -### -c --target=aarch64-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-LIBMVEC-AARCH64 %s
 // RUN: %clang -### -c --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM %s 2>&1 | FileCheck --check-prefix=CHECK-AMDLIBM %s
 // RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck --check-prefix=CHECK-MASSV %s
 // RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck --check-prefix=CHECK-DARWIN_LIBSYSTEM_M %s
@@ -12,6 +13,7 @@
 // CHECK-NOLIB: "-fveclib=none"
 // CHECK-ACCELERATE: "-fveclib=Accelerate"
 // CHECK-libmvec: "-fveclib=libmvec"
+// CHECK-LIBMVEC-AARCH64: "-fveclib=libmvec"
 // CHECK-AMDLIBM: "-fveclib=AMDLIBM"
 // CHECK-MASSV: "-fveclib=MASSV"
 // CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m"
@@ -23,7 +25,6 @@
 
 // RUN: not %clang --target=x86 -c -fveclib=SLEEF %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
 // RUN: not %clang --target=x86 -c -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
-// RUN: not %clang --target=aarch64 -c -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
 // RUN: not %clang --target=aarch64 -c -fveclib=SVML %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
 // RUN: not %clang --target=aarch64 -c -fveclib=AMDLIBM %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
 // CHECK-ERROR: unsupported option {{.*}} for target
@@ -43,6 +44,9 @@
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=libmvec -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBMVEC %s
 // CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC"
 
+// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=libmvec -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBMVEC-AARCH64 %s
+// CHECK-LTO-LIBMVEC-AARCH64: "-plugin-opt=-vector-library=LIBMVEC"
+
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-AMDLIBM %s
 // CHECK-LTO-AMDLIBM: "-plugin-opt=-vector-library=AMDLIBM"
 
@@ -68,6 +72,10 @@
 // CHECK-ERRNO-LIBMVEC: "-fveclib=libmvec"
 // CHECK-ERRNO-LIBMVEC-SAME: "-fmath-errno"
 
+// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-ERRNO-LIBMVEC-AARCH64 %s
+// CHECK-ERRNO-LIBMVEC-AARCH64: "-fveclib=libmvec"
+// CHECK-ERRNO-LIBMVEC-AARCH64-SAME: "-fmath-errno"
+
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM %s 2>&1 | FileCheck --check-prefix=CHECK-ERRNO-AMDLIBM %s
 // CHECK-ERRNO-AMDLIBM: "-fveclib=AMDLIBM"
 // CHECK-ERRNO-AMDLIBM-SAME: "-fmath-errno"

diff  --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 68753a2497db2..4015df990729f 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -237,6 +237,266 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8), "_ZGV_LLVM_N8v")
 
+#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_VECFUNCS)
+
+TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("acosf", "_ZGVnN2v_acosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVnN2v_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN2v_acosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN4v_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN2v_acoshf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("asinf", "_ZGVnN2v_asinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN2v_asinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN4v_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN2v_asinhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atanf", "_ZGVnN2v_atanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN2v_atanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN2vv_atan2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC)
+TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN2vv_atan2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC)
+
+TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN2v_atanhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN2v_cbrtf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVnN2v_cosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN2v_cosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("coshf", "_ZGVnN2v_coshf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVnN2v_cosh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN2v_coshf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN4v_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("erff", "_ZGVnN2v_erff", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("erff", "_ZGVnN4v_erff", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN2v_erfcf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN4v_erfcf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("expf", "_ZGVnN2v_expf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN2v_expf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN2v_exp10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN2v_exp10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN2v_exp2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN2v_exp2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN2v_expm1f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN2vv_hypotf", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC)
+TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC)
+
+TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("logf", "_ZGVnN2v_logf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log", "_ZGVsMxv_log",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN2v_logf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log10f", "_ZGVnN2v_log10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN2v_log10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN2v_log1pf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log2f", "_ZGVnN2v_log2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN2v_log2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("powf", "_ZGVnN2vv_powf", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC)
+TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN2vv_powf", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC)
+
+TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVnN2v_sinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("sin", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN2v_sinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN2v_sinhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN2v_sinhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("tanf", "_ZGVnN2v_tanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN2v_tanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN2v_tanhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh",  SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
+TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVnN2v_tanh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN2v_tanhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN4v_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall)
+TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC)
+TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC)
+
 #elif defined(TLI_DEFINE_MASSV_VECFUNCS)
 // IBM MASS library's vector Functions
 

diff  --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index c8b568354965d..a3ed093134390 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -1299,6 +1299,14 @@ static const VecDesc VecFuncs_LIBMVEC_X86[] = {
 #undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
 };
 
+static const VecDesc VecFuncs_LIBMVEC_AARCH64[] = {
+#define TLI_DEFINE_LIBMVEC_AARCH64_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX, CC)               \
+  {SCAL, VEC, VF, MASK, VABI_PREFIX, CC},
+#include "llvm/Analysis/VecFuncs.def"
+#undef TLI_DEFINE_LIBMVEC_AARCH64_VECFUNCS
+};
+
 static const VecDesc VecFuncs_MASSV[] = {
 #define TLI_DEFINE_MASSV_VECFUNCS
 #include "llvm/Analysis/VecFuncs.def"
@@ -1376,6 +1384,10 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     case llvm::Triple::x86_64:
       addVectorizableFunctions(VecFuncs_LIBMVEC_X86);
       break;
+    case llvm::Triple::aarch64:
+    case llvm::Triple::aarch64_be:
+      addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64);
+      break;
     }
     break;
   }

diff  --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll
new file mode 100644
index 0000000000000..1b541d1330aae
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll
@@ -0,0 +1,579 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt -mattr=+sve -vector-library=LIBMVEC -replace-with-veclib -S < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;.
+; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxvv_atan2, ptr @_ZGVsMxvv_atan2f, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
+;.
+define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_ceil_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_ceil_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_ceil_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_copysign_vscale_f64(<vscale x 2 x double> %mag, <vscale x 2 x double> %sgn) {
+; CHECK-LABEL: @llvm_copysign_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[MAG:%.*]], <vscale x 2 x double> [[SGN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> %mag, <vscale x 2 x double> %sgn)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_copysign_vscale_f32(<vscale x 4 x float> %mag, <vscale x 4 x float> %sgn) {
+; CHECK-LABEL: @llvm_copysign_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[MAG:%.*]], <vscale x 4 x float> [[SGN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> %mag, <vscale x 4 x float> %sgn)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_cos_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_cos_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp2_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp2_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_fabs_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_fabs_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_fabs_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_fabs_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_floor_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_floor_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_floor_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_floor_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_fma_vscale_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c ) {
+; CHECK-LABEL: @llvm_fma_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_fma_vscale_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
+; CHECK-LABEL: @llvm_fma_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_log_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_log_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_log10_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_log10_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_log2_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_log2_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_maxnum_vscale_f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1) {
+; CHECK-LABEL: @llvm_maxnum_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[IN0:%.*]], <vscale x 2 x double> [[IN1:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_maxnum_vscale_f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1) {
+; CHECK-LABEL: @llvm_maxnum_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[IN0:%.*]], <vscale x 4 x float> [[IN1:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_minnum_vscale_f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1) {
+; CHECK-LABEL: @llvm_minnum_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[IN0:%.*]], <vscale x 2 x double> [[IN1:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_minnum_vscale_f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1) {
+; CHECK-LABEL: @llvm_minnum_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[IN0:%.*]], <vscale x 4 x float> [[IN1:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_nearbyint_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_nearbyint_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_nearbyint_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) {
+; CHECK-LABEL: @llvm_pow_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) {
+; CHECK-LABEL: @llvm_pow_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_rint_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_rint_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_rint_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_rint_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_round_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_round_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_round_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_round_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_sin_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_sin_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sqrt_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_sqrt_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sqrt_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_sqrt_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_tan_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_tan_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.tan.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_tan_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_tan_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.tan.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_acos_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_acos_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.acos.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_acos_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_acos_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.acos.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_asin_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_asin_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.asin.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_asin_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_asin_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.asin.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_atan_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_atan_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.atan.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_atan_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_atan_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.atan.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_atan2_vscale_f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: @llvm_atan2_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[X:%.*]], <vscale x 2 x double> [[Y:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.atan2.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %y)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_atan2_vscale_f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: @llvm_atan2_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> [[Y:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.atan2.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x float> %y)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_cosh_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_cosh_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.cosh.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_cosh_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_cosh_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.cosh.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sinh_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_sinh_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sinh.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sinh_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_sinh_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sinh.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_tanh_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_tanh_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.tanh.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_tanh_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_tanh_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.tanh.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+
+define <vscale x 2 x double> @llvm_trunc_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_trunc_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_trunc_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_trunc_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.tan.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.tan.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float>)
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { "target-features"="+sve" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+sve" }
+;.

diff  --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
new file mode 100644
index 0000000000000..6323d942a08e7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll
@@ -0,0 +1,577 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt -vector-library=LIBMVEC -replace-with-veclib -S < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;.
+; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2vv_atan2, ptr @_ZGVnN4vv_atan2f, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
+;.
+define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_ceil_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_ceil_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_ceil_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_copysign_f64(<2 x double> %mag, <2 x double> %sgn) {
+; CHECK-LABEL: @llvm_copysign_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> [[MAG:%.*]], <2 x double> [[SGN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sgn)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_copysign_f32(<4 x float> %mag, <4 x float> %sgn) {
+; CHECK-LABEL: @llvm_copysign_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> [[MAG:%.*]], <4 x float> [[SGN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sgn)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_cos_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_cos_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cos(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_cos_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_cos_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_cosf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_exp_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_expf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp2_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp2_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_fabs_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_fabs_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_fabs_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_fabs_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_floor_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_floor_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.floor.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_floor_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_floor_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_fma_f64(<2 x double> %a, <2 x double> %b, <2 x double> %c ) {
+; CHECK-LABEL: @llvm_fma_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_fma_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: @llvm_fma_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_log_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_log_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_log_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_log_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_logf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_log10_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_log10_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_log10_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_log10_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_log2_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_log2_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log2(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_log2_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_log2_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log2f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_maxnum_f64(<2 x double> %in0, <2 x double> %in1) {
+; CHECK-LABEL: @llvm_maxnum_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[IN0:%.*]], <2 x double> [[IN1:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> %in0, <2 x double> %in1)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_maxnum_f32(<4 x float> %in0, <4 x float> %in1) {
+; CHECK-LABEL: @llvm_maxnum_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> [[IN0:%.*]], <4 x float> [[IN1:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %in0, <4 x float> %in1)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_minnum_f64(<2 x double> %in0, <2 x double> %in1) {
+; CHECK-LABEL: @llvm_minnum_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.minnum.v2f64(<2 x double> [[IN0:%.*]], <2 x double> [[IN1:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.minnum.v2f64(<2 x double> %in0, <2 x double> %in1)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_minnum_f32(<4 x float> %in0, <4 x float> %in1) {
+; CHECK-LABEL: @llvm_minnum_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.minnum.v4f32(<4 x float> [[IN0:%.*]], <4 x float> [[IN1:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.minnum.v4f32(<4 x float> %in0, <4 x float> %in1)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_nearbyint_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_nearbyint_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_nearbyint_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
+; CHECK-LABEL: @llvm_pow_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
+; CHECK-LABEL: @llvm_pow_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_rint_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_rint_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.rint.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_rint_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_rint_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.rint.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_round_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_round_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.round.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_round_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_round_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.round.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_sin_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_sin_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sin(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_sin_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_sin_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_sqrt_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_sqrt_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_sqrt_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_sqrt_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_tan_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_tan_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_tan_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_tan_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_acos_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_acos_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_acos(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.acos.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_acos_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_acos_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_acosf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.acos.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_asin_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_asin_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_asin(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.asin.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_asin_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_asin_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_asinf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.asin.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_atan_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_atan_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_atan(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.atan.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_atan_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_atan_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_atanf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.atan.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @llvm_atan2_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[INX:%.*]], <2 x double> [[INY:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @llvm_atan2_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[INX:%.*]], <4 x float> [[INY:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_cosh_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_cosh_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.cosh.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_cosh_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_cosh_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_coshf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.cosh.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_sinh_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_sinh_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sinh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.sinh.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_sinh_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_sinh_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.sinh.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_tanh_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_tanh_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tanh(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.tanh.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_tanh_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_tanh_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.tanh.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <2 x double> @llvm_trunc_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_trunc_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> [[IN:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_trunc_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_trunc_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> [[IN:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
+declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.cos.v2f64(<2 x double>)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>)
+declare <2 x double> @llvm.exp.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp.v4f32(<4 x float>)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
+declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+declare <4 x float> @llvm.log.v4f32(<4 x float>)
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+declare <4 x float> @llvm.log10.v4f32(<4 x float>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <4 x float> @llvm.log2.v4f32(<4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <4 x float> @llvm.rint.v4f32(<4 x float>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+declare <4 x float> @llvm.sin.v4f32(<4 x float>)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <2 x double> @llvm.tan.v2f64(<2 x double>)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
index c6ea44bb85f11..670b08987c81e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
@@ -1,4 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(cos|sin|tan|cbrt|erf|exp[^e]|gamma|log|sqrt|copysign|dim|min|mod|hypot|nextafter|pow|fma)" --version 2
+; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=LIBMVEC-NEON
+; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s -check-prefix=LIBMVEC-NEON-WIDTH-2
+; RUN: opt -mattr=+sve -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s -check-prefix=LIBMVEC-SVE
 ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=SLEEF-NEON
 ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s -check-prefix=SLEEF-SVE
 ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefixes=SLEEF-SVE-NOPRED
@@ -19,6 +22,18 @@ declare double @acos(double)
 declare float @acosf(float)
 
 define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @acos_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @acos_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @acos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
@@ -64,6 +79,18 @@ define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @acos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @acos_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_acosf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @acos_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @acos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -112,6 +139,18 @@ declare double @acosh(double)
 declare float @acoshf(float)
 
 define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @acosh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acosh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @acosh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acosh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @acosh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -157,6 +196,18 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @acosh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acosh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_acoshf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @acosh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acoshf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @acosh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -205,6 +256,18 @@ declare double @asin(double)
 declare float @asinf(float)
 
 define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @asin_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @asin_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @asin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
@@ -250,6 +313,18 @@ define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @asin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @asin_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_asinf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @asin_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @asin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -298,6 +373,18 @@ declare double @asinh(double)
 declare float @asinhf(float)
 
 define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @asinh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asinh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @asinh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @asinh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -343,6 +430,18 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @asinh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asinh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_asinhf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @asinh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @asinh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -391,6 +490,18 @@ declare double @atan(double)
 declare float @atanf(float)
 
 define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atan_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
@@ -436,6 +547,18 @@ define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_atanf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atan_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -484,6 +607,18 @@ declare double @atan2(double, double)
 declare float @atan2f(float, float)
 
 define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan2_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan2_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atan2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -529,6 +664,18 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan2_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_atan2f(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan2_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atan2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -577,6 +724,18 @@ declare double @atanh(double)
 declare float @atanhf(float)
 
 define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atanh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atanh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @atanh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atanh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -622,6 +781,18 @@ define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @atanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atanh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atanh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_atanhf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @atanh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atanh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -670,6 +841,18 @@ declare double @cbrt(double)
 declare float @cbrtf(float)
 
 define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cbrt_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cbrt_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cbrt_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cbrt(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cbrt_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]])
@@ -715,6 +898,18 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cbrt_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cbrt_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_cbrtf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cbrt_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cbrtf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cbrt_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -763,6 +958,18 @@ declare double @copysign(double, double)
 declare float @copysignf(float, float)
 
 define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @copysign_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @copysign_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @copysign_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_copysign(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -808,6 +1015,18 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @copysign_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @copysign_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @copysign_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_copysignf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -856,6 +1075,18 @@ declare double @cos(double)
 declare float @cosf(float)
 
 define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cos_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cos_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
@@ -901,6 +1132,18 @@ define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @cos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cos_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_cosf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cos_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -949,6 +1192,18 @@ declare double @cosh(double)
 declare float @coshf(float)
 
 define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cosh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cosh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cosh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -994,6 +1249,18 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cosh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_coshf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cosh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cosh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1042,6 +1309,18 @@ declare double @cospi(double)
 declare float @cospif(float)
 
 define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cospi_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cospi_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cospi_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cospi_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cospi(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1087,6 +1366,18 @@ define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cospi_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cospi_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cospi_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cospi_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cospif(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1135,6 +1426,18 @@ declare double @erf(double)
 declare float @erff(float)
 
 define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @erf_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erf_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @erf_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erf(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @erf_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1180,6 +1483,18 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @erf_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_erff(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erf_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_erff(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @erf_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erff(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @erf_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_erff(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1228,6 +1543,18 @@ declare double @erfc(double)
 declare float @erfcf(float)
 
 define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @erfc_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erfc_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @erfc_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erfc(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @erfc_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1273,6 +1600,18 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @erfc_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_erfcf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erfc_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_erfcf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @erfc_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erfcf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @erfc_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_erfcf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1321,6 +1660,18 @@ declare double @exp(double)
 declare float @expf(float)
 
 define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1366,6 +1717,18 @@ define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_expf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1414,6 +1777,18 @@ declare double @exp10(double)
 declare float @exp10f(float)
 
 define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp10_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp10_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1459,6 +1834,18 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp10_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp10f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp10_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1507,6 +1894,18 @@ declare double @exp2(double)
 declare float @exp2f(float)
 
 define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp2_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp2_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1552,6 +1951,18 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp2_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp2f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp2_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1600,6 +2011,18 @@ declare double @expm1(double)
 declare float @expm1f(float)
 
 define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @expm1_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @expm1_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @expm1_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_expm1(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @expm1_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1645,6 +2068,18 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @expm1_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @expm1_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_expm1f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @expm1_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expm1f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @expm1_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1693,6 +2128,18 @@ declare double @fdim(double, double)
 declare float @fdimf(float, float)
 
 define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fdim_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fdim_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fdim_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fdim_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fdim(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -1738,6 +2185,18 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fdim_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fdim_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fdim_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fdim_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fdimf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -1786,6 +2245,18 @@ declare double @fma(double, double, double)
 declare float @fmaf(float, float, float)
 
 define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fma_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fma_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vvv_fma(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
@@ -1831,6 +2302,18 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fma_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fma_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vvv_fmaf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
@@ -1879,6 +2362,18 @@ declare double @fmax(double, double)
 declare float @fmaxf(float, float)
 
 define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fmax_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmax_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fmax_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fmax_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fmax(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -1924,6 +2419,18 @@ define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fmax_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmax_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fmax_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fmax_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fmaxf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -1972,6 +2479,18 @@ declare double @fmin(double, double)
 declare float @fminf(float, float)
 
 define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fmin_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmin_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fmin_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fmin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fmin(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -2017,6 +2536,18 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fmin_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmin_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fmin_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fmin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fminf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -2065,6 +2596,18 @@ declare double @fmod(double, double)
 declare float @fmodf(float, float)
 
 define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fmod_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmod_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fmod_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fmod_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -2110,6 +2653,18 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fmod_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmod_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @fmod_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @fmod_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -2158,6 +2713,18 @@ declare double @hypot(double, double)
 declare float @hypotf(float, float)
 
 define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @hypot_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @hypot_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @hypot_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_hypot(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @hypot_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -2203,6 +2770,18 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @hypot_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @hypot_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_hypotf(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @hypot_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_hypotf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @hypot_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -2251,6 +2830,18 @@ declare i32 @ilogb(double)
 declare i32 @ilogbf(float)
 
 define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @ilogb_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ilogb_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @ilogb_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @ilogb_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x i32> @_ZGVnN2v_ilogb(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2296,6 +2887,18 @@ define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @ilogb_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ilogb_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @ilogb_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @ilogb_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x i32> @_ZGVnN4v_ilogbf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2344,6 +2947,18 @@ declare double @ldexp(double, i32)
 declare float @ldexpf(float, i32)
 
 define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @ldexp_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ldexp_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @ldexp_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @ldexp_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP4:%.*]] = call <2 x double> @_ZGVnN2vv_ldexp(<2 x double> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
@@ -2391,6 +3006,18 @@ define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %
 }
 
 define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @ldexp_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ldexp_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @ldexp_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @ldexp_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP4:%.*]] = call <4 x float> @_ZGVnN4vv_ldexpf(<4 x float> [[WIDE_LOAD:%.*]], <4 x i32> [[WIDE_LOAD1:%.*]])
@@ -2441,6 +3068,18 @@ declare double @lgamma(double)
 declare float @lgammaf(float)
 
 define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @lgamma_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @lgamma_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @lgamma_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @lgamma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2486,6 +3125,18 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @lgamma_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @lgamma_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @lgamma_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @lgamma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2534,6 +3185,18 @@ declare double @log(double)
 declare float @logf(float)
 
 define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2579,6 +3242,18 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_logf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2627,6 +3302,18 @@ declare double @log10(double)
 declare float @log10f(float)
 
 define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log10_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log10_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2672,6 +3359,18 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log10_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log10f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log10_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2720,6 +3419,18 @@ declare double @log1p(double)
 declare float @log1pf(float)
 
 define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log1p_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log1p_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log1p_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log1p(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log1p_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2765,6 +3476,18 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log1p_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log1p_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log1pf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log1p_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log1pf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log1p_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2813,6 +3536,18 @@ declare double @log2(double)
 declare float @log2f(float)
 
 define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log2_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log2_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2858,6 +3593,18 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log2_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log2f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log2_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2909,6 +3656,18 @@ declare double @modf(double, ptr)
 declare float @modff(float, ptr)
 
 define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; LIBMVEC-NEON-LABEL: define void @modf_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @modf_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @modf_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @modf_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]])
@@ -2953,6 +3712,18 @@ for.cond.cleanup:
 }
 
 define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; LIBMVEC-NEON-LABEL: define void @modf_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @modf_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @modf_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @modf_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]])
@@ -3000,6 +3771,18 @@ declare double @nextafter(double, double)
 declare float @nextafterf(float, float)
 
 define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @nextafter_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nextafter_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @nextafter_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @nextafter_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_nextafter(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -3045,6 +3828,18 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @nextafter_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nextafter_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+;
+; LIBMVEC-SVE-LABEL: define void @nextafter_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @nextafter_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_nextafterf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -3093,6 +3888,18 @@ declare double @pow(double, double)
 declare float @powf(float, float)
 
 define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @pow_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @pow_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @pow_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -3138,6 +3945,18 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @pow_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_powf(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @pow_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @pow_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -3186,6 +4005,18 @@ declare double @sin(double)
 declare float @sinf(float)
 
 define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sin_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sin_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3231,6 +4062,18 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sin_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sin_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -3282,6 +4125,18 @@ declare void @sincos(double, ptr, ptr)
 declare void @sincosf(float, ptr, ptr)
 
 define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; LIBMVEC-NEON-LABEL: define void @sincos_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincos_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sincos_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sincos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
@@ -3325,6 +4180,18 @@ for.cond.cleanup:
 }
 
 define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; LIBMVEC-NEON-LABEL: define void @sincos_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincos_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sincos_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sincos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
@@ -3374,6 +4241,18 @@ declare void @sincospi(double, ptr, ptr)
 declare void @sincospif(float, ptr, ptr)
 
 define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; LIBMVEC-NEON-LABEL: define void @sincospi_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincospi_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sincospi_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sincospi_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
@@ -3417,6 +4296,18 @@ for.cond.cleanup:
 }
 
 define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; LIBMVEC-NEON-LABEL: define void @sincospi_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincospi_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sincospi_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sincospi_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]])
@@ -3463,6 +4354,18 @@ declare double @sinh(double)
 declare float @sinhf(float)
 
 define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sinh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sinh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sinh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3508,6 +4411,18 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sinh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinhf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sinh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sinh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -3556,6 +4471,18 @@ declare double @sinpi(double)
 declare float @sinpif(float)
 
 define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sinpi_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinpi_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sinpi_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sinpi_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinpi(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3601,6 +4528,18 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sinpi_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinpi_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sinpi_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sinpi_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinpif(<4 x float> [[WIDE_LOAD:%.*]])
@@ -3649,6 +4588,18 @@ declare double @sqrt(double)
 declare float @sqrtf(float)
 
 define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sqrt_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sqrt_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sqrt_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3694,6 +4645,18 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sqrt_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sqrt_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sqrt_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -3742,6 +4705,18 @@ declare double @tan(double)
 declare float @tanf(float)
 
 define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tan_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tan_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tan_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3787,6 +4762,18 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tan_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tan_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tan_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -3835,6 +4822,18 @@ declare double @tanh(double)
 declare float @tanhf(float)
 
 define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tanh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tanh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tanh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3880,6 +4879,18 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tanh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanhf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tanh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tanh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -3928,6 +4939,18 @@ declare double @tgamma(double)
 declare float @tgammaf(float)
 
 define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tgamma_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tgamma_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tgamma_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tgamma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[WIDE_LOAD:%.*]])
@@ -3973,6 +4996,18 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
 }
 
 define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tgamma_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON:    [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tgamma_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-NEON-WIDTH-2:    [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tgamma_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tgamma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[WIDE_LOAD:%.*]])

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
index f753df32d9ebc..f6f2e39594dd8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
@@ -1,5 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(acos|asin|atan|atan2|cos|cosh|exp|log|sin|sinh|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|tanh|trunc)" --version 2
 
+; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=LIBMVEC-NEON
+; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=LIBMVEC-NEON-WIDTH-2
+; RUN: opt -mattr=+sve -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=LIBMVEC-SVE
 ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON
 ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE
 ; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=ARMPL-NEON
@@ -16,6 +19,19 @@ declare double @llvm.acos.f64(double)
 declare float @llvm.acos.f32(float)
 
 define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @acos_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @acos_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.acos.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @acos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
@@ -51,6 +67,19 @@ define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @acos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @acos_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_acosf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @acos_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.acos.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @acos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -89,6 +118,19 @@ declare double @llvm.asin.f64(double)
 declare float @llvm.asin.f32(float)
 
 define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @asin_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @asin_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.asin.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @asin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
@@ -124,6 +166,19 @@ define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @asin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @asin_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_asinf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @asin_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.asin.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @asin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -162,6 +217,19 @@ declare double @llvm.atan.f64(double)
 declare float @llvm.atan.f32(float)
 
 define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.atan.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atan_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
@@ -197,6 +265,19 @@ define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_atanf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.atan.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @atan_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -235,6 +316,19 @@ declare double @llvm.atan2.f64(double, double)
 declare float @llvm.atan2.f32(float, float)
 
 define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan2_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan2_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.atan2.f64(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @atan2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -270,6 +364,19 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @atan2_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_atan2f(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @atan2_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.atan2.f32(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @atan2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -308,6 +415,18 @@ declare double @llvm.ceil.f64(double)
 declare float @llvm.ceil.f32(float)
 
 define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @ceil_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ceil_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @ceil_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @ceil_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -343,6 +462,18 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @ceil_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ceil_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @ceil_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @ceil_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -381,6 +512,19 @@ declare double @llvm.copysign.f64(double, double)
 declare float @llvm.copysign.f32(float, float)
 
 define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @copysign_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @copysign_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.copysign.f64(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @copysign_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -416,6 +560,19 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @copysign_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @copysign_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.copysign.f32(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @copysign_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -454,6 +611,19 @@ declare double @llvm.cos.f64(double)
 declare float @llvm.cos.f32(float)
 
 define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cos_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cos_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
@@ -489,6 +659,19 @@ define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cos_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_cosf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cos_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -527,6 +710,19 @@ declare double @llvm.cosh.f64(double)
 declare float @llvm.cosh.f32(float)
 
 define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cosh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cosh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.cosh.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cosh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -562,6 +758,19 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @cosh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @cosh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_coshf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @cosh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.cosh.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @cosh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -600,6 +809,19 @@ declare double @llvm.exp.f64(double)
 declare float @llvm.exp.f32(float)
 
 define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
@@ -635,6 +857,19 @@ define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_expf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -673,6 +908,19 @@ declare double @llvm.exp10.f64(double)
 declare float @llvm.exp10.f32(float)
 
 define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp10_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp10_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
@@ -708,6 +956,19 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp10_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp10f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp10_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -746,6 +1007,19 @@ declare double @llvm.exp2.f64(double)
 declare float @llvm.exp2.f32(float)
 
 define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp2_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp2_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
@@ -781,6 +1055,19 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @exp2_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp2f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @exp2_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @exp2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -819,6 +1106,18 @@ declare double @llvm.fabs.f64(double)
 declare float @llvm.fabs.f32(float)
 
 define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fabs_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fabs_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @fabs_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @fabs_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -854,6 +1153,18 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fabs_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fabs_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @fabs_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @fabs_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -892,6 +1203,18 @@ declare double @llvm.floor.f64(double)
 declare float @llvm.floor.f32(float)
 
 define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @floor_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @floor_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @floor_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @floor_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -927,6 +1250,18 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @floor_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @floor_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @floor_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @floor_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -965,6 +1300,18 @@ declare double @llvm.fma.f64(double, double, double)
 declare float @llvm.fma.f32(float, float, float)
 
 define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fma_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @fma_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
+;
 ; SLEEF-NEON-LABEL: define void @fma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
@@ -1000,6 +1347,18 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @fma_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @fma_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
+;
 ; SLEEF-NEON-LABEL: define void @fma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
@@ -1038,6 +1397,19 @@ declare double @llvm.log.f64(double)
 declare float @llvm.log.f32(float)
 
 define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.log.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1073,6 +1445,19 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_logf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.log.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1111,6 +1496,19 @@ declare double @llvm.log10.f64(double)
 declare float @llvm.log10.f32(float)
 
 define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log10_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log10_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1146,6 +1544,19 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log10_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log10f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log10_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1184,6 +1595,19 @@ declare double @llvm.log2.f64(double)
 declare float @llvm.log2.f32(float)
 
 define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log2_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log2_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1219,6 +1643,19 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @log2_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log2f(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @log2_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @log2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1257,6 +1694,18 @@ declare double @llvm.maxnum.f64(double, double)
 declare float @llvm.maxnum.f32(float, float)
 
 define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @maxnum_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @maxnum_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @maxnum_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
 ; SLEEF-NEON-LABEL: define void @maxnum_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -1292,6 +1741,18 @@ define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @maxnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @maxnum_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @maxnum_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @maxnum_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
 ; SLEEF-NEON-LABEL: define void @maxnum_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -1330,6 +1791,18 @@ declare double @llvm.minnum.f64(double, double)
 declare float @llvm.minnum.f32(float, float)
 
 define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @minnum_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @minnum_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @minnum_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
 ; SLEEF-NEON-LABEL: define void @minnum_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -1365,6 +1838,18 @@ define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @minnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @minnum_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @minnum_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @minnum_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
 ; SLEEF-NEON-LABEL: define void @minnum_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -1403,6 +1888,18 @@ declare double @llvm.nearbyint.f64(double)
 declare float @llvm.nearbyint.f32(float)
 
 define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @nearbyint_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nearbyint_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @nearbyint_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @nearbyint_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1438,6 +1935,18 @@ define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @nearbyint_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nearbyint_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @nearbyint_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @nearbyint_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1476,6 +1985,19 @@ declare double @llvm.pow.f64(double, double)
 declare float @llvm.pow.f32(float, float)
 
 define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @pow_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @pow_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[IN:%.*]], double [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @pow_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
@@ -1511,6 +2033,19 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @pow_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_powf(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]])
+;
+; LIBMVEC-SVE-LABEL: define void @pow_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[IN:%.*]], float [[IN]])
+;
 ; SLEEF-NEON-LABEL: define void @pow_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
@@ -1549,6 +2084,18 @@ declare double @llvm.rint.f64(double)
 declare float @llvm.rint.f32(float)
 
 define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @rint_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @rint_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @rint_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @rint_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1584,6 +2131,18 @@ define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @rint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @rint_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @rint_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @rint_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @rint_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1622,6 +2181,18 @@ declare double @llvm.round.f64(double)
 declare float @llvm.round.f32(float)
 
 define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @round_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @round_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @round_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @round_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1657,6 +2228,18 @@ define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @round_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @round_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @round_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @round_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1695,6 +2278,19 @@ declare double @llvm.sin.f64(double)
 declare float @llvm.sin.f32(float)
 
 define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sin_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sin_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1730,6 +2326,19 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sin_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sin_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1768,6 +2377,19 @@ declare double @llvm.sinh.f64(double)
 declare float @llvm.sinh.f32(float)
 
 define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sinh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sinh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.sinh.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sinh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1803,6 +2425,19 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @sinh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sinh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinhf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sinh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.sinh.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sinh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1841,6 +2476,18 @@ declare double @llvm.sqrt.f64(double)
 declare float @llvm.sqrt.f32(float)
 
 define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sqrt_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sqrt_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sqrt_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1876,6 +2523,18 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @sqrt_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @sqrt_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @sqrt_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1914,6 +2573,19 @@ declare double @llvm.tan.f64(double)
 declare float @llvm.tan.f32(float)
 
 define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tan_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tan_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.tan.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tan_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
@@ -1949,6 +2621,19 @@ define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tan_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tan_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.tan.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tan_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -1987,6 +2672,19 @@ declare double @llvm.tanh.f64(double)
 declare float @llvm.tanh.f32(float)
 
 define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tanh_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tanh_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call double @llvm.tanh.f64(double [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tanh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2022,6 +2720,19 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @tanh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @tanh_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanhf(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @tanh_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP8:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
+; LIBMVEC-SVE:    [[CALL:%.*]] = tail call float @llvm.tanh.f32(float [[IN:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @tanh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
@@ -2060,6 +2771,18 @@ declare double @llvm.trunc.f64(double)
 declare float @llvm.trunc.f32(float)
 
 define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @trunc_f64
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @trunc_f64
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @trunc_f64
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @trunc_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
@@ -2095,6 +2818,18 @@ define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) {
 }
 
 define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; LIBMVEC-NEON-LABEL: define void @trunc_f32
+; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-NEON-WIDTH-2-LABEL: define void @trunc_f32
+; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-NEON-WIDTH-2:    [[TMP2:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[WIDE_LOAD:%.*]])
+;
+; LIBMVEC-SVE-LABEL: define void @trunc_f32
+; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; LIBMVEC-SVE:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
 ; SLEEF-NEON-LABEL: define void @trunc_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
 ; SLEEF-NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])

diff  --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
index a1f660d31668e..5459512239b64 100644
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -1,15 +1,13 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=SVML -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,SVML
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=AMDLIBM -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,AMDLIBM
 ; RUN: opt -mtriple=powerpc64-unknown-linux-gnu -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,MASSV
-; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=LIBMVEC-AARCH64
+; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-AARCH64
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-X86
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,ACCELERATE
 ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,SLEEFGNUABI
 ; RUN: opt -mtriple=riscv64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,SLEEFGNUABI_RISCV
 ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=ArmPL -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,ARMPL
 
-; LIBMVEC-AARCH64-NOT: llvm.compiler.used
-
 ; COMMON-LABEL: @llvm.compiler.used = appending global
 ; SVML-SAME:        [6 x ptr] [
 ; SVML-SAME:          ptr @__svml_sin2,
@@ -35,6 +33,12 @@
 ; MASSV-SAME:         ptr @__log10f4
 ; ACCELERATE-SAME:  [1 x ptr] [
 ; ACCELERATE-SAME:    ptr @vlog10f
+; LIBMVEC-AARCH64-SAME: [5 x ptr] [
+; LIBMVEC-AARCH64-SAME:   ptr @_ZGVnN2v_sin,
+; LIBMVEC-AARCH64-SAME:   ptr @_ZGVsMxv_sin,
+; LIBMVEC-AARCH64-SAME:   ptr @_ZGVnN2v_log10f,
+; LIBMVEC-AARCH64-SAME:   ptr @_ZGVnN4v_log10f,
+; LIBMVEC-AARCH64-SAME:   ptr @_ZGVsMxv_log10f
 ; LIBMVEC-X86-SAME: [2 x ptr] [
 ; LIBMVEC-X86-SAME:   ptr @_ZGVbN2v_sin,
 ; LIBMVEC-X86-SAME:   ptr @_ZGVdN4v_sin
@@ -100,6 +104,7 @@ define double @sin_f64(double %in) {
 ; AMDLIBM:            call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
 ; MASSV:              call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
 ; ACCELERATE:         call double @sin(double %{{.*}})
+; LIBMVEC-AARCH64:    call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
 ; LIBMVEC-X86:        call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
 ; SLEEFGNUABI:        call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
 ; SLEEFGNUABI_RISCV:  call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
@@ -158,6 +163,7 @@ define float @call_llvm.log10.f32(float %in) {
 ; COMMON-LABEL:       @call_llvm.log10.f32(
 ; SVML:               call float @llvm.log10.f32(float %{{.*}})
 ; AMDLIBM:            call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
+; LIBMVEC-AARCH64:    call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
 ; LIBMVEC-X86:        call float @llvm.log10.f32(float %{{.*}})
 ; MASSV:              call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
 ; ACCELERATE:         call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
@@ -167,6 +173,7 @@ define float @call_llvm.log10.f32(float %in) {
 ; No mapping of "llvm.log10.f32" to a vector function for SVML.
 ; SVML-NOT:        _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
 ; AMDLIBM-NOT:        _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
+; LIBMVEC-AARCH64-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
 ; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
   %call = tail call float @llvm.log10.f32(float %in)
   ret float %call
@@ -196,8 +203,11 @@ declare float @llvm.log10.f32(float) #0
 ; MASSV: declare <2 x double> @__sind2(<2 x double>)
 ; MASSV: declare <4 x float> @__log10f4(<4 x float>)
 
-; LIBMVEC-AARCH64-NOT: declare <2 x double> @_ZGVbN2v_sin(<2 x double>)
-; LIBMVEC-AARCH64-NOT: declare <4 x double> @_ZGVdN4v_sin(<4 x double>)
+; LIBMVEC-AARCH64: declare aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double>)
+; LIBMVEC-AARCH64: declare <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double>, <vscale x 2 x i1>)
+; LIBMVEC-AARCH64: declare aarch64_vector_pcs <2 x float> @_ZGVnN2v_log10f(<2 x float>)
+; LIBMVEC-AARCH64: declare aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float>)
+; LIBMVEC-AARCH64: declare <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float>, <vscale x 4 x i1>)
 
 ; LIBMVEC-X86: declare <2 x double> @_ZGVbN2v_sin(<2 x double>)
 ; LIBMVEC-X86: declare <4 x double> @_ZGVdN4v_sin(<4 x double>)
@@ -272,6 +282,14 @@ attributes #0 = { nounwind readnone }
 ; ACCELERATE:      attributes #[[LOG10]] = { "vector-function-abi-variant"=
 ; ACCELERATE-SAME:   "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" }
 
+; LIBMVEC-AARCH64:      attributes #[[SIN]] = { "vector-function-abi-variant"=
+; LIBMVEC-AARCH64-SAME:   "_ZGV_LLVM_N2v_sin(_ZGVnN2v_sin),
+; LIBMVEC-AARCH64-SAME:   _ZGVsMxv_sin(_ZGVsMxv_sin)" }
+; LIBMVEC-AARCH64:      attributes #[[LOG10]] = { "vector-function-abi-variant"=
+; LIBMVEC-AARCH64-SAME:   "_ZGV_LLVM_N2v_llvm.log10.f32(_ZGVnN2v_log10f),
+; LIBMVEC-AARCH64-SAME:   _ZGV_LLVM_N4v_llvm.log10.f32(_ZGVnN4v_log10f),
+; LIBMVEC-AARCH64-SAME:   _ZGVsMxv_llvm.log10.f32(_ZGVsMxv_log10f)" }
+
 ; LIBMVEC-X86:      attributes #[[SIN]] = { "vector-function-abi-variant"=
 ; LIBMVEC-X86-SAME:   "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin),
 ; LIBMVEC-X86-SAME:   _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }