[llvm] [TLI][NFC] Fix ordering of ArmPL and SLEEF tests (PR #77609)

Wed Jan 10 05:43:42 PST 2024

https://github.com/mgabka created https://github.com/llvm/llvm-project/pull/77609

This patch sorts the tests which check if SLEEF and ArmPL mappings are used, in the order of the math functions base names.

>From 9be744df0ada146f2aefe03ff04c7c514dcfe258 Mon Sep 17 00:00:00 2001
From: Maciej Gabka <maciej.gabka at arm.com>
Date: Wed, 10 Jan 2024 10:03:59 +0000
Subject: [PATCH] [TLI][NFC] Fix ordering of ArmPL and SLEEF tests

This patch sorts the tests which check if SLEEF and ArmPL mappings are used,
in the order of the math functions base names.
---
 .../AArch64/replace-with-veclib-armpl.ll      |  253 +--
 .../replace-with-veclib-sleef-scalable.ll     |   34 +-
 .../AArch64/replace-with-veclib-sleef.ll      |   34 +-
 .../AArch64/veclib-function-calls.ll          | 1545 ++++++++---------
 .../AArch64/veclib-intrinsic-calls.ll         |  821 +++++----
 5 files changed, 1307 insertions(+), 1380 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
index 4480a90a2728d3..758df0493cc504 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
 declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
 
 ;.
-; CHECK: @llvm.compiler.used = appending global [36 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [36 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata"
 ;.
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -57,51 +57,6 @@ define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) #0 {
   ret <vscale x 4 x float> %1
 }
 
-declare <2 x double> @llvm.sin.v2f64(<2 x double>)
-declare <4 x float> @llvm.sin.v4f32(<4 x float>)
-declare <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double>)
-declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
-
-define <2 x double> @llvm_sin_f64(<2 x double> %in) {
-; CHECK-LABEL: define <2 x double> @llvm_sin_f64
-; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
-  ret <2 x double> %1
-}
-
-define <4 x float> @llvm_sin_f32(<4 x float> %in) {
-; CHECK-LABEL: define <4 x float> @llvm_sin_f32
-; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
-  ret <4 x float> %1
-}
-
-define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64
-; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
-  ret <vscale x 2 x double> %1
-}
-
-define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32
-; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
-  ret <vscale x 4 x float> %1
-}
-
 declare <2 x double> @llvm.exp.v2f64(<2 x double>)
 declare <4 x float> @llvm.exp.v4f32(<4 x float>)
 declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
@@ -147,93 +102,93 @@ define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) #0 {
   ret <vscale x 4 x float> %1
 }
 
-declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
-declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
-declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
-declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
+declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
 
-define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
-; CHECK-LABEL: define <2 x double> @llvm_exp2_f64
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
+  %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
   ret <2 x double> %1
 }
 
-define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
-; CHECK-LABEL: define <4 x float> @llvm_exp2_f32
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
+  %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
   ret <4 x float> %1
 }
 
-define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
-  %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
+  %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
   ret <vscale x 2 x double> %1
 }
 
-define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
-  %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
+  %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
   ret <vscale x 4 x float> %1
 }
 
-declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
-declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
-declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
-declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
 
-define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
-; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
+define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_exp2_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp2q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+  %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
   ret <2 x double> %1
 }
 
-define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
-; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
+define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_exp2_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp2q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+  %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
   ret <4 x float> %1
 }
 
-define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
+define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64
 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
-  %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+  %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
   ret <vscale x 2 x double> %1
 }
 
-define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
+define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32
 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
-  %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+  %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
   ret <vscale x 4 x float> %1
 }
 
@@ -282,93 +237,93 @@ define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) #0 {
   ret <vscale x 4 x float> %1
 }
 
-declare <2 x double> @llvm.log2.v2f64(<2 x double>)
-declare <4 x float> @llvm.log2.v4f32(<4 x float>)
-declare <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double>)
-declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+declare <4 x float> @llvm.log10.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
 
-define <2 x double> @llvm_log2_f64(<2 x double> %in) {
-; CHECK-LABEL: define <2 x double> @llvm_log2_f64
+define <2 x double> @llvm_log10_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_log10_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
+  %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
   ret <2 x double> %1
 }
 
-define <4 x float> @llvm_log2_f32(<4 x float> %in) {
-; CHECK-LABEL: define <4 x float> @llvm_log2_f32
+define <4 x float> @llvm_log10_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_log10_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
+  %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
   ret <4 x float> %1
 }
 
-define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64
+define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64
 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
-  %1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
+  %1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
   ret <vscale x 2 x double> %1
 }
 
-define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32
+define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32
 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
-  %1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
+  %1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
   ret <vscale x 4 x float> %1
 }
 
-declare <2 x double> @llvm.log10.v2f64(<2 x double>)
-declare <4 x float> @llvm.log10.v4f32(<4 x float>)
-declare <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double>)
-declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <4 x float> @llvm.log2.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
 
-define <2 x double> @llvm_log10_f64(<2 x double> %in) {
-; CHECK-LABEL: define <2 x double> @llvm_log10_f64
+define <2 x double> @llvm_log2_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_log2_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vlog2q_f64(<2 x double> [[IN]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in)
+  %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in)
   ret <2 x double> %1
 }
 
-define <4 x float> @llvm_log10_f32(<4 x float> %in) {
-; CHECK-LABEL: define <4 x float> @llvm_log10_f32
+define <4 x float> @llvm_log2_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_log2_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vlog2q_f32(<4 x float> [[IN]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in)
+  %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in)
   ret <4 x float> %1
 }
 
-define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64
+define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64
 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
-  %1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
+  %1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
   ret <vscale x 2 x double> %1
 }
 
-define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32
+define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32
 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
-  %1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
+  %1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
   ret <vscale x 4 x float> %1
 }
 
@@ -424,6 +379,52 @@ define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vsca
   ret <vscale x 4 x float> %1
 }
 
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+declare <4 x float> @llvm.sin.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
+
+define <2 x double> @llvm_sin_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_sin_f64
+; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vsinq_f64(<2 x double> [[IN]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_sin_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_sin_f32
+; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vsinq_f32(<4 x float> [[IN]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64
+; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32
+; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+
 define <2 x double> @frem_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @frem_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
index 590dd9effac0ea..9e214655e413a0 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
@@ -4,7 +4,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ;.
-; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata"
 ;.
 define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
 ; CHECK-LABEL: @llvm_ceil_vscale_f64(
@@ -78,39 +78,39 @@ define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
   ret <vscale x 4 x float> %1
 }
 
-define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
-; CHECK-LABEL: @llvm_exp2_vscale_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
-  %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
+  %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
   ret <vscale x 2 x double> %1
 }
 
-define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
-; CHECK-LABEL: @llvm_exp2_vscale_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
-  %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
+  %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
   ret <vscale x 4 x float> %1
 }
 
-define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
-; CHECK-LABEL: @llvm_exp10_vscale_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp2_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
-  %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+  %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
   ret <vscale x 2 x double> %1
 }
 
-define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
-; CHECK-LABEL: @llvm_exp10_vscale_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp2_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
-  %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+  %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
   ret <vscale x 4 x float> %1
 }
 
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
index 865a46009b205f..f408df570fdc00 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
@@ -4,7 +4,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ;.
-; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata"
 ;.
 define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_ceil_f64(
@@ -78,39 +78,39 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) {
   ret <4 x float> %1
 }
 
-define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
-; CHECK-LABEL: @llvm_exp2_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
+  %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
   ret <2 x double> %1
 }
 
-define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
-; CHECK-LABEL: @llvm_exp2_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
+  %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
   ret <4 x float> %1
 }
 
-define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
-; CHECK-LABEL: @llvm_exp10_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
+define <2 x double> @llvm_exp2_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp2_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+  %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in)
   ret <2 x double> %1
 }
 
-define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
-; CHECK-LABEL: @llvm_exp10_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
+define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp2_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+  %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
   ret <4 x float> %1
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
index e30173a5888181..f9a07c50ebf36c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
@@ -376,6 +376,79 @@ define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
+declare double @atan2(double, double)
+declare float @atan2f(float, float)
+
+define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @atan2_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; SLEEF-SVE-LABEL: define void @atan2_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @atan2_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; ARMPL-SVE-LABEL: define void @atan2_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @atan2(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @atan2_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; SLEEF-SVE-LABEL: define void @atan2_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @atan2_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; ARMPL-SVE-LABEL: define void @atan2_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @atan2f(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @atanh(double)
 declare float @atanhf(float)
 
@@ -522,6 +595,79 @@ define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
+declare double @copysign(double, double)
+declare float @copysignf(float, float)
+
+define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @copysign_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+;
+; SLEEF-SVE-LABEL: define void @copysign_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+;
+; ARMPL-NEON-LABEL: define void @copysign_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcopysignq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; ARMPL-SVE-LABEL: define void @copysign_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svcopysign_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @copysign(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @copysign_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+;
+; SLEEF-SVE-LABEL: define void @copysign_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+;
+; ARMPL-NEON-LABEL: define void @copysign_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcopysignq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; ARMPL-SVE-LABEL: define void @copysign_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svcopysign_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @copysignf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @cos(double)
 declare float @cosf(float)
 
@@ -887,25 +1033,25 @@ define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @exp2(double)
-declare float @exp2f(float)
+declare double @exp10(double)
+declare float @exp10f(float)
 
-define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp2_f64
+define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @exp2_f64
+; SLEEF-SVE-LABEL: define void @exp10_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @exp2_f64
+; ARMPL-NEON-LABEL: define void @exp10_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @exp2_f64
+; ARMPL-SVE-LABEL: define void @exp10_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -914,7 +1060,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @exp2(double %in)
+  %call = tail call double @exp10(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -925,22 +1071,22 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp2_f32
+define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @exp2_f32
+; SLEEF-SVE-LABEL: define void @exp10_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @exp2_f32
+; ARMPL-NEON-LABEL: define void @exp10_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @exp2_f32
+; ARMPL-SVE-LABEL: define void @exp10_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -949,7 +1095,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @exp2f(float %in)
+  %call = tail call float @exp10f(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -960,25 +1106,25 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @exp10(double)
-declare float @exp10f(float)
+declare double @exp2(double)
+declare float @exp2f(float)
 
-define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp10_f64
+define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @exp10_f64
+; SLEEF-SVE-LABEL: define void @exp2_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @exp10_f64
+; ARMPL-NEON-LABEL: define void @exp2_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @exp10_f64
+; ARMPL-SVE-LABEL: define void @exp2_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -987,7 +1133,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @exp10(double %in)
+  %call = tail call double @exp2(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -998,22 +1144,22 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp10_f32
+define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @exp10_f32
+; SLEEF-SVE-LABEL: define void @exp2_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @exp10_f32
+; ARMPL-NEON-LABEL: define void @exp2_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @exp10_f32
+; ARMPL-SVE-LABEL: define void @exp2_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1022,7 +1168,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @exp10f(float %in)
+  %call = tail call float @exp2f(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1106,25 +1252,25 @@ define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @lgamma(double)
-declare float @lgammaf(float)
+declare double @fdim(double, double)
+declare float @fdimf(float, float)
 
-define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @lgamma_f64
+define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fdim_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @lgamma_f64
+; SLEEF-SVE-LABEL: define void @fdim_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_lgamma(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @lgamma_f64
+; ARMPL-NEON-LABEL: define void @fdim_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfdimq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @lgamma_f64
+; ARMPL-SVE-LABEL: define void @fdim_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlgamma_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfdim_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1133,7 +1279,7 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @lgamma(double %in)
+  %call = tail call double @fdim(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1144,22 +1290,22 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @lgamma_f32
+define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fdim_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @lgamma_f32
+; SLEEF-SVE-LABEL: define void @fdim_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_lgammaf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @lgamma_f32
+; ARMPL-NEON-LABEL: define void @fdim_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfdimq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @lgamma_f32
+; ARMPL-SVE-LABEL: define void @fdim_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlgamma_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfdim_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1168,7 +1314,7 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @lgammaf(float %in)
+  %call = tail call float @fdimf(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1179,25 +1325,25 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @log(double)
-declare float @logf(float)
+declare double @fma(double, double, double)
+declare float @fmaf(float, float, float)
 
-define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log_f64
+define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @log_f64
+; SLEEF-SVE-LABEL: define void @fma_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @log_f64
+; ARMPL-NEON-LABEL: define void @fma_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfmaq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log_f64
+; ARMPL-SVE-LABEL: define void @fma_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfma_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1206,7 +1352,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @log(double %in)
+  %call = tail call double @fma(double %in, double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1217,22 +1363,22 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log_f32
+define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @log_f32
+; SLEEF-SVE-LABEL: define void @fma_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @log_f32
+; ARMPL-NEON-LABEL: define void @fma_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfmaq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log_f32
+; ARMPL-SVE-LABEL: define void @fma_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfma_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1241,7 +1387,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @logf(float %in)
+  %call = tail call float @fmaf(float %in, float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1252,25 +1398,25 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @log1p(double)
-declare float @log1pf(float)
+declare double @fmin(double, double)
+declare float @fminf(float, float)
 
-define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log1p_f64
+define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fmin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @log1p_f64
+; SLEEF-SVE-LABEL: define void @fmin_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @log1p_f64
+; ARMPL-NEON-LABEL: define void @fmin_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog1pq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfminq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log1p_f64
+; ARMPL-SVE-LABEL: define void @fmin_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog1p_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfmin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1279,7 +1425,7 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @log1p(double %in)
+  %call = tail call double @fmin(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1290,22 +1436,22 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log1p_f32
+define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fmin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @log1p_f32
+; SLEEF-SVE-LABEL: define void @fmin_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @log1p_f32
+; ARMPL-NEON-LABEL: define void @fmin_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog1pq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfminq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log1p_f32
+; ARMPL-SVE-LABEL: define void @fmin_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog1p_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfmin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1314,7 +1460,7 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @log1pf(float %in)
+  %call = tail call float @fminf(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1325,25 +1471,25 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @log2(double)
-declare float @log2f(float)
+declare double @fmod(double, double)
+declare float @fmodf(float, float)
 
-define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log2_f64
+define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fmod_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @log2_f64
+; SLEEF-SVE-LABEL: define void @fmod_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log2_f64
+; ARMPL-NEON-LABEL: define void @fmod_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfmodq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log2_f64
+; ARMPL-SVE-LABEL: define void @fmod_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1352,7 +1498,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @log2(double %in)
+  %call = tail call double @fmod(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1363,22 +1509,22 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log2_f32
+define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fmod_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @log2_f32
+; SLEEF-SVE-LABEL: define void @fmod_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log2_f32
+; ARMPL-NEON-LABEL: define void @fmod_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfmodq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log2_f32
+; ARMPL-SVE-LABEL: define void @fmod_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1387,7 +1533,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @log2f(float %in)
+  %call = tail call float @fmodf(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1398,25 +1544,25 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @log10(double)
-declare float @log10f(float)
+declare double @hypot(double, double)
+declare float @hypotf(float, float)
 
-define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log10_f64
+define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @hypot_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @log10_f64
+; SLEEF-SVE-LABEL: define void @hypot_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @log10_f64
+; ARMPL-NEON-LABEL: define void @hypot_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vhypotq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log10_f64
+; ARMPL-SVE-LABEL: define void @hypot_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svhypot_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1425,7 +1571,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @log10(double %in)
+  %call = tail call double @hypot(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1436,22 +1582,22 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log10_f32
+define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @hypot_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @log10_f32
+; SLEEF-SVE-LABEL: define void @hypot_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @log10_f32
+; ARMPL-NEON-LABEL: define void @hypot_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vhypotq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @log10_f32
+; ARMPL-SVE-LABEL: define void @hypot_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svhypot_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1460,7 +1606,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @log10f(float %in)
+  %call = tail call float @hypotf(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1471,25 +1617,25 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @sin(double)
-declare float @sinf(float)
+declare double @lgamma(double)
+declare float @lgammaf(float)
 
-define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sin_f64
+define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @lgamma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sin_f64
+; SLEEF-SVE-LABEL: define void @lgamma_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_lgamma(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sin_f64
+; ARMPL-NEON-LABEL: define void @lgamma_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sin_f64
+; ARMPL-SVE-LABEL: define void @lgamma_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlgamma_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1498,7 +1644,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @sin(double %in)
+  %call = tail call double @lgamma(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1509,22 +1655,22 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sin_f32
+define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @lgamma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sin_f32
+; SLEEF-SVE-LABEL: define void @lgamma_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_lgammaf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sin_f32
+; ARMPL-NEON-LABEL: define void @lgamma_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sin_f32
+; ARMPL-SVE-LABEL: define void @lgamma_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlgamma_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1533,7 +1679,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @sinf(float %in)
+  %call = tail call float @lgammaf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1544,25 +1690,25 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @sinh(double)
-declare float @sinhf(float)
+declare double @log(double)
+declare float @logf(float)
 
-define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sinh_f64
+define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sinh_f64
+; SLEEF-SVE-LABEL: define void @log_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sinh_f64
+; ARMPL-NEON-LABEL: define void @log_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinhq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sinh_f64
+; ARMPL-SVE-LABEL: define void @log_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1571,7 +1717,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @sinh(double %in)
+  %call = tail call double @log(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1582,22 +1728,22 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sinh_f32
+define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sinh_f32
+; SLEEF-SVE-LABEL: define void @log_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sinh_f32
+; ARMPL-NEON-LABEL: define void @log_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinhq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sinh_f32
+; ARMPL-SVE-LABEL: define void @log_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1606,7 +1752,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @sinhf(float %in)
+  %call = tail call float @logf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1617,25 +1763,25 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @sinpi(double)
-declare float @sinpif(float)
+declare double @log10(double)
+declare float @log10f(float)
 
-define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sinpi_f64
+define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sinpi_f64
+; SLEEF-SVE-LABEL: define void @log10_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sinpi_f64
+; ARMPL-NEON-LABEL: define void @log10_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinpiq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sinpi_f64
+; ARMPL-SVE-LABEL: define void @log10_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsinpi_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1644,7 +1790,7 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @sinpi(double %in)
+  %call = tail call double @log10(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1655,22 +1801,22 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sinpi_f32
+define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sinpi_f32
+; SLEEF-SVE-LABEL: define void @log10_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sinpi_f32
+; ARMPL-NEON-LABEL: define void @log10_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinpiq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sinpi_f32
+; ARMPL-SVE-LABEL: define void @log10_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsinpi_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1679,7 +1825,7 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @sinpif(float %in)
+  %call = tail call float @log10f(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1690,25 +1836,25 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @sqrt(double)
-declare float @sqrtf(float)
+declare double @log1p(double)
+declare float @log1pf(float)
 
-define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sqrt_f64
+define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log1p_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sqrt_f64
+; SLEEF-SVE-LABEL: define void @log1p_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sqrt(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sqrt_f64
+; ARMPL-NEON-LABEL: define void @log1p_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsqrtq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog1pq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sqrt_f64
+; ARMPL-SVE-LABEL: define void @log1p_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsqrt_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog1p_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1717,7 +1863,7 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @sqrt(double %in)
+  %call = tail call double @log1p(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1728,22 +1874,22 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sqrt_f32
+define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log1p_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sqrt_f32
+; SLEEF-SVE-LABEL: define void @log1p_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sqrtf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sqrt_f32
+; ARMPL-NEON-LABEL: define void @log1p_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsqrtq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog1pq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sqrt_f32
+; ARMPL-SVE-LABEL: define void @log1p_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsqrt_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog1p_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1752,7 +1898,7 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @sqrtf(float %in)
+  %call = tail call float @log1pf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1763,25 +1909,25 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @tan(double)
-declare float @tanf(float)
+declare double @log2(double)
+declare float @log2f(float)
 
-define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @tan_f64
+define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @tan_f64
+; SLEEF-SVE-LABEL: define void @log2_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @tan_f64
+; ARMPL-NEON-LABEL: define void @log2_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @tan_f64
+; ARMPL-SVE-LABEL: define void @log2_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1790,7 +1936,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @tan(double %in)
+  %call = tail call double @log2(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1801,22 +1947,22 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @tan_f32
+define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @tan_f32
+; SLEEF-SVE-LABEL: define void @log2_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @tan_f32
+; ARMPL-NEON-LABEL: define void @log2_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @tan_f32
+; ARMPL-SVE-LABEL: define void @log2_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1825,7 +1971,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @tanf(float %in)
+  %call = tail call float @log2f(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1836,98 +1982,100 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @tanh(double)
-declare float @tanhf(float)
+declare double @modf(double, ptr)
+declare float @modff(float, ptr)
 
-define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @tanh_f64
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
+define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; SLEEF-NEON-LABEL: define void @test_modf
+; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @tanh_f64
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE-LABEL: define void @test_modf
+; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[TMP23:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @tanh_f64
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtanhq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON-LABEL: define void @test_modf
+; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @tanh_f64
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE-LABEL: define void @test_modf
+; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP23:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-  entry:
+entry:
   br label %for.body
 
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
-  %in = load double, ptr %in.gep, align 8
-  %call = tail call double @tanh(double %in)
-  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
-  store double %call, ptr %out.gep, align 8
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %data = call double @modf(double %num, ptr %gepb)
+  %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
 
-  for.end:
+for.cond.cleanup:
   ret void
 }
 
-define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @tanh_f32
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
+define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; SLEEF-NEON-LABEL: define void @test_modff
+; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @tanh_f32
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE-LABEL: define void @test_modff
+; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[TMP23:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @tanh_f32
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtanhq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON-LABEL: define void @test_modff
+; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @tanh_f32
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE-LABEL: define void @test_modff
+; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP23:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-  entry:
+entry:
   br label %for.body
 
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
-  %in = load float, ptr %in.gep, align 8
-  %call = tail call float @tanhf(float %in)
-  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
-  store float %call, ptr %out.gep, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %data = call float @modff(float %num, ptr %gepb)
+  %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
   ret void
 }
 
-declare double @tgamma(double)
-declare float @tgammaf(float)
+declare double @nextafter(double, double)
+declare float @nextafterf(float, float)
 
-define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @tgamma_f64
+define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @nextafter_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @tgamma_f64
+; SLEEF-SVE-LABEL: define void @nextafter_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tgamma(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @tgamma_f64
+; ARMPL-NEON-LABEL: define void @nextafter_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vnextafterq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @tgamma_f64
+; ARMPL-SVE-LABEL: define void @nextafter_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svtgamma_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svnextafter_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1936,7 +2084,7 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @tgamma(double %in)
+  %call = tail call double @nextafter(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1947,22 +2095,22 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @tgamma_f32
+define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @nextafter_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
 ;
-; SLEEF-SVE-LABEL: define void @tgamma_f32
+; SLEEF-SVE-LABEL: define void @nextafter_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tgammaf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
 ;
-; ARMPL-NEON-LABEL: define void @tgamma_f32
+; ARMPL-NEON-LABEL: define void @nextafter_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vnextafterq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @tgamma_f32
+; ARMPL-SVE-LABEL: define void @nextafter_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svtgamma_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svnextafter_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1971,7 +2119,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @tgammaf(float %in)
+  %call = tail call float @nextafterf(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1982,25 +2130,25 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @atan2(double, double)
-declare float @atan2f(float, float)
+declare double @pow(double, double)
+declare float @powf(float, float)
 
-define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @atan2_f64
+define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @pow_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @atan2_f64
+; SLEEF-SVE-LABEL: define void @pow_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @atan2_f64
+; ARMPL-NEON-LABEL: define void @pow_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @atan2_f64
+; ARMPL-SVE-LABEL: define void @pow_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2009,7 +2157,7 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @atan2(double %in, double %in)
+  %call = tail call double @pow(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2020,22 +2168,22 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @atan2_f32
+define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @pow_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @atan2_f32
+; SLEEF-SVE-LABEL: define void @pow_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @atan2_f32
+; ARMPL-NEON-LABEL: define void @pow_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @atan2_f32
+; ARMPL-SVE-LABEL: define void @pow_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2044,7 +2192,7 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @atan2f(float %in, float %in)
+  %call = tail call float @powf(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2055,25 +2203,25 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @copysign(double, double)
-declare float @copysignf(float, float)
+declare double @sin(double)
+declare float @sinf(float)
 
-define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @copysign_f64
+define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sin_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @copysign_f64
+; SLEEF-SVE-LABEL: define void @sin_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @copysign_f64
+; ARMPL-NEON-LABEL: define void @sin_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcopysignq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @copysign_f64
+; ARMPL-SVE-LABEL: define void @sin_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svcopysign_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2082,7 +2230,7 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @copysign(double %in, double %in)
+  %call = tail call double @sin(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2093,22 +2241,22 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @copysign_f32
+define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sin_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @copysign_f32
+; SLEEF-SVE-LABEL: define void @sin_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @copysign_f32
+; ARMPL-NEON-LABEL: define void @sin_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcopysignq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @copysign_f32
+; ARMPL-SVE-LABEL: define void @sin_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svcopysign_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2117,7 +2265,7 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @copysignf(float %in, float %in)
+  %call = tail call float @sinf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2128,25 +2276,98 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @fdim(double, double)
-declare float @fdimf(float, float)
+declare void @sincos(double, ptr, ptr)
+declare void @sincosf(float, ptr, ptr)
 
-define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fdim_f64
+define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; SLEEF-NEON-LABEL: define void @test_sincos
+; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @test_sincos
+; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @test_sincos
+; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @test_sincos
+; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+  call void @sincos(double %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; SLEEF-NEON-LABEL: define void @test_sincosf
+; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @test_sincosf
+; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @test_sincosf
+; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @test_sincosf
+; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    call void @armpl_svsincos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
+  call void @sincosf(float %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+declare double @sinh(double)
+declare float @sinhf(float)
+
+define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sinh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fdim_f64
+; SLEEF-SVE-LABEL: define void @sinh_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fdim_f64
+; ARMPL-NEON-LABEL: define void @sinh_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfdimq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinhq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fdim_f64
+; ARMPL-SVE-LABEL: define void @sinh_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfdim_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2155,7 +2376,7 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @fdim(double %in, double %in)
+  %call = tail call double @sinh(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2166,95 +2387,22 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fdim_f32
+define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sinh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fdim_f32
+; SLEEF-SVE-LABEL: define void @sinh_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fdim_f32
+; ARMPL-NEON-LABEL: define void @sinh_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfdimq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
-;
-; ARMPL-SVE-LABEL: define void @fdim_f32
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfdim_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
-  %in = load float, ptr %in.gep, align 8
-  %call = tail call float @fdimf(float %in, float %in)
-  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
-  store float %call, ptr %out.gep, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-declare double @fmin(double, double)
-declare float @fminf(float, float)
-
-define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fmin_f64
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
-;
-; SLEEF-SVE-LABEL: define void @fmin_f64
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
-;
-; ARMPL-NEON-LABEL: define void @fmin_f64
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfminq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
-;
-; ARMPL-SVE-LABEL: define void @fmin_f64
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfmin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
-  %in = load double, ptr %in.gep, align 8
-  %call = tail call double @fmin(double %in, double %in)
-  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
-  store double %call, ptr %out.gep, align 8
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fmin_f32
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
-;
-; SLEEF-SVE-LABEL: define void @fmin_f32
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
-;
-; ARMPL-NEON-LABEL: define void @fmin_f32
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfminq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinhq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fmin_f32
+; ARMPL-SVE-LABEL: define void @sinh_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfmin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2263,7 +2411,7 @@ define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @fminf(float %in, float %in)
+  %call = tail call float @sinhf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2274,25 +2422,25 @@ define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @fmod(double, double)
-declare float @fmodf(float, float)
+declare double @sinpi(double)
+declare float @sinpif(float)
 
-define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fmod_f64
+define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sinpi_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fmod_f64
+; SLEEF-SVE-LABEL: define void @sinpi_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fmod_f64
+; ARMPL-NEON-LABEL: define void @sinpi_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfmodq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinpiq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fmod_f64
+; ARMPL-SVE-LABEL: define void @sinpi_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsinpi_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2301,7 +2449,7 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @fmod(double %in, double %in)
+  %call = tail call double @sinpi(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2312,22 +2460,22 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fmod_f32
+define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sinpi_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fmod_f32
+; SLEEF-SVE-LABEL: define void @sinpi_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fmod_f32
+; ARMPL-NEON-LABEL: define void @sinpi_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfmodq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinpiq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fmod_f32
+; ARMPL-SVE-LABEL: define void @sinpi_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsinpi_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2336,7 +2484,7 @@ define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @fmodf(float %in, float %in)
+  %call = tail call float @sinpif(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2347,25 +2495,25 @@ define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @hypot(double, double)
-declare float @hypotf(float, float)
+declare double @sqrt(double)
+declare float @sqrtf(float)
 
-define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @hypot_f64
+define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sqrt_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @hypot_f64
+; SLEEF-SVE-LABEL: define void @sqrt_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sqrt(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @hypot_f64
+; ARMPL-NEON-LABEL: define void @sqrt_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vhypotq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsqrtq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @hypot_f64
+; ARMPL-SVE-LABEL: define void @sqrt_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svhypot_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsqrt_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2374,7 +2522,7 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @hypot(double %in, double %in)
+  %call = tail call double @sqrt(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2385,22 +2533,22 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @hypot_f32
+define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sqrt_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @hypot_f32
+; SLEEF-SVE-LABEL: define void @sqrt_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sqrtf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @hypot_f32
+; ARMPL-NEON-LABEL: define void @sqrt_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vhypotq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsqrtq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @hypot_f32
+; ARMPL-SVE-LABEL: define void @sqrt_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svhypot_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsqrt_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2409,7 +2557,7 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @hypotf(float %in, float %in)
+  %call = tail call float @sqrtf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2420,25 +2568,25 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @nextafter(double, double)
-declare float @nextafterf(float, float)
+declare double @tan(double)
+declare float @tanf(float)
 
-define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @nextafter_f64
+define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @tan_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @nextafter_f64
+; SLEEF-SVE-LABEL: define void @tan_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @nextafter_f64
+; ARMPL-NEON-LABEL: define void @tan_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vnextafterq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @nextafter_f64
+; ARMPL-SVE-LABEL: define void @tan_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svnextafter_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2447,7 +2595,7 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @nextafter(double %in, double %in)
+  %call = tail call double @tan(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2458,22 +2606,22 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @nextafter_f32
+define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @tan_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @nextafter_f32
+; SLEEF-SVE-LABEL: define void @tan_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @nextafter_f32
+; ARMPL-NEON-LABEL: define void @tan_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vnextafterq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @nextafter_f32
+; ARMPL-SVE-LABEL: define void @tan_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svnextafter_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2482,7 +2630,7 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @nextafterf(float %in, float %in)
+  %call = tail call float @tanf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2493,25 +2641,25 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @pow(double, double)
-declare float @powf(float, float)
+declare double @tanh(double)
+declare float @tanhf(float)
 
-define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @pow_f64
+define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @tanh_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @pow_f64
+; SLEEF-SVE-LABEL: define void @tanh_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @pow_f64
+; ARMPL-NEON-LABEL: define void @tanh_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtanhq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @pow_f64
+; ARMPL-SVE-LABEL: define void @tanh_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2520,7 +2668,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @pow(double %in, double %in)
+  %call = tail call double @tanh(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2531,22 +2679,22 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @pow_f32
+define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @tanh_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @pow_f32
+; SLEEF-SVE-LABEL: define void @tanh_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @pow_f32
+; ARMPL-NEON-LABEL: define void @tanh_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtanhq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @pow_f32
+; ARMPL-SVE-LABEL: define void @tanh_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2555,7 +2703,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @powf(float %in, float %in)
+  %call = tail call float @tanhf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2566,25 +2714,25 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-declare double @fma(double, double, double)
-declare float @fmaf(float, float, float)
+declare double @tgamma(double)
+declare float @tgammaf(float)
 
-define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fma_f64
+define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @tgamma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fma_f64
+; SLEEF-SVE-LABEL: define void @tgamma_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tgamma(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fma_f64
+; ARMPL-NEON-LABEL: define void @tgamma_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vfmaq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtgammaq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fma_f64
+; ARMPL-SVE-LABEL: define void @tgamma_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svfma_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svtgamma_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2593,7 +2741,7 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @fma(double %in, double %in, double %in)
+  %call = tail call double @tgamma(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2604,22 +2752,22 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   ret void
 }
 
-define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fma_f32
+define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @tgamma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fma_f32
+; SLEEF-SVE-LABEL: define void @tgamma_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tgammaf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fma_f32
+; ARMPL-NEON-LABEL: define void @tgamma_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vfmaq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtgammaq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fma_f32
+; ARMPL-SVE-LABEL: define void @tgamma_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svfma_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svtgamma_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -2628,7 +2776,7 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @fmaf(float %in, float %in, float %in)
+  %call = tail call float @tgammaf(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -2638,224 +2786,3 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
   for.end:
   ret void
 }
-
-declare double @modf(double, ptr)
-declare float @modff(float, ptr)
-
-define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; SLEEF-NEON-LABEL: define void @test_modf
-; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @test_modf
-; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP23:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @test_modf
-; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @test_modf
-; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP23:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
-  %num = load double, ptr %gepa, align 8
-  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
-  %data = call double @modf(double %num, ptr %gepb)
-  %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
-  store double %data, ptr %gepc, align 8
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
-  ret void
-}
-
-define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; SLEEF-NEON-LABEL: define void @test_modff
-; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @test_modff
-; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    [[TMP23:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @test_modff
-; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @test_modff
-; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    [[TMP23:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
-  %num = load float, ptr %gepa, align 8
-  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
-  %data = call float @modff(float %num, ptr %gepb)
-  %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
-  store float %data, ptr %gepc, align 8
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
-  ret void
-}
-
-declare void @sincos(double, ptr, ptr)
-declare void @sincosf(float, ptr, ptr)
-
-define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; SLEEF-NEON-LABEL: define void @test_sincos
-; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @test_sincos
-; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @test_sincos
-; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @test_sincos
-; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
-  %num = load double, ptr %gepa, align 8
-  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
-  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
-  call void @sincos(double %num, ptr %gepb, ptr %gepc)
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
-  ret void
-}
-
-define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; SLEEF-NEON-LABEL: define void @test_sincosf
-; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @test_sincosf
-; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @test_sincosf
-; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @test_sincosf
-; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
-  %num = load float, ptr %gepa, align 8
-  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
-  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
-  call void @sincosf(float %num, ptr %gepb, ptr %gepc)
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
-  ret void
-}
-
-declare void @sincospi(double, ptr, ptr)
-declare void @sincospif(float, ptr, ptr)
-
-define void @test_sincospi(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; SLEEF-NEON-LABEL: define void @test_sincospi
-; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @test_sincospi
-; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincospi(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @test_sincospi
-; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @test_sincospi
-; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincospi_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
-  %num = load double, ptr %gepa, align 8
-  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
-  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
-  call void @sincospi(double %num, ptr %gepb, ptr %gepc)
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
-  ret void
-}
-
-define void @test_sincospif(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
-; SLEEF-NEON-LABEL: define void @test_sincospif
-; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @test_sincospif
-; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincospif(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @test_sincospif
-; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-NEON:    call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @test_sincospif
-; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
-; ARMPL-SVE:    call void @armpl_svsincospi_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-entry:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
-  %num = load float, ptr %gepa, align 8
-  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
-  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
-  call void @sincospif(float %num, ptr %gepb, ptr %gepc)
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup:
-  ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
index d59c28849bfb5f..2a552077a42b61 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
@@ -12,171 +12,25 @@ target triple = "aarch64-unknown-linux-gnu"
 ; are checking fixed width vectorization with NEON and scalable vectorization
 ; with SVE.
 
-declare double @llvm.cos.f64(double)
-declare float @llvm.cos.f32(float)
+declare double @llvm.ceil.f64(double)
+declare float @llvm.ceil.f32(float)
 
-define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @cos_f64
+define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @ceil_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @cos_f64
+; SLEEF-SVE-LABEL: define void @ceil_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @cos_f64
+; ARMPL-NEON-LABEL: define void @ceil_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @cos_f64
+; ARMPL-SVE-LABEL: define void @ceil_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
-  %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.cos.f64(double %in)
-  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
-  store double %call, ptr %out.gep, align 8
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @cos_f32
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @cos_f32
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @cos_f32
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @cos_f32
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
-  %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.cos.f32(float %in)
-  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
-  store float %call, ptr %out.gep, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-declare double @llvm.exp.f64(double)
-declare float @llvm.exp.f32(float)
-
-define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp_f64
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @exp_f64
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @exp_f64
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @exp_f64
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
-  %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.exp.f64(double %in)
-  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
-  store double %call, ptr %out.gep, align 8
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp_f32
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @exp_f32
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @exp_f32
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @exp_f32
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
-  %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.exp.f32(float %in)
-  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
-  store float %call, ptr %out.gep, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-declare double @llvm.exp2.f64(double)
-declare float @llvm.exp2.f32(float)
-
-define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp2_f64
-; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
-;
-; SLEEF-SVE-LABEL: define void @exp2_f64
-; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
-;
-; ARMPL-NEON-LABEL: define void @exp2_f64
-; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
-;
-; ARMPL-SVE-LABEL: define void @exp2_f64
-; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 ;
   entry:
   br label %for.body
@@ -185,7 +39,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.exp2.f64(double %in)
+  %call = tail call double @llvm.ceil.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -196,22 +50,22 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp2_f32
+define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @ceil_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @exp2_f32
+; SLEEF-SVE-LABEL: define void @ceil_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @exp2_f32
+; ARMPL-NEON-LABEL: define void @ceil_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @exp2_f32
+; ARMPL-SVE-LABEL: define void @ceil_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 ;
   entry:
   br label %for.body
@@ -220,7 +74,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.exp2.f32(float %in)
+  %call = tail call float @llvm.ceil.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -231,25 +85,25 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.exp10.f64(double)
-declare float @llvm.exp10.f32(float)
+declare double @llvm.copysign.f64(double, double)
+declare float @llvm.copysign.f32(float, float)
 
-define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp10_f64
+define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @copysign_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @exp10_f64
+; SLEEF-SVE-LABEL: define void @copysign_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 ;
-; ARMPL-NEON-LABEL: define void @exp10_f64
+; ARMPL-NEON-LABEL: define void @copysign_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @exp10_f64
+; ARMPL-SVE-LABEL: define void @copysign_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 ;
   entry:
   br label %for.body
@@ -258,7 +112,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.exp10.f64(double %in)
+  %call = tail call double @llvm.copysign.f64(double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -269,22 +123,22 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @exp10_f32
+define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @copysign_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @exp10_f32
+; SLEEF-SVE-LABEL: define void @copysign_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 ;
-; ARMPL-NEON-LABEL: define void @exp10_f32
+; ARMPL-NEON-LABEL: define void @copysign_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @exp10_f32
+; ARMPL-SVE-LABEL: define void @copysign_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 ;
   entry:
   br label %for.body
@@ -293,7 +147,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.exp10.f32(float %in)
+  %call = tail call float @llvm.copysign.f32(float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -304,25 +158,25 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.log.f64(double)
-declare float @llvm.log.f32(float)
+declare double @llvm.cos.f64(double)
+declare float @llvm.cos.f32(float)
 
-define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log_f64
+define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @cos_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @log_f64
+; SLEEF-SVE-LABEL: define void @cos_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log_f64
+; ARMPL-NEON-LABEL: define void @cos_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @log_f64
+; ARMPL-SVE-LABEL: define void @cos_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -331,7 +185,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.log.f64(double %in)
+  %call = tail call double @llvm.cos.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -342,22 +196,22 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log_f32
+define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @cos_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @log_f32
+; SLEEF-SVE-LABEL: define void @cos_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log_f32
+; ARMPL-NEON-LABEL: define void @cos_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @log_f32
+; ARMPL-SVE-LABEL: define void @cos_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -366,7 +220,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.log.f32(float %in)
+  %call = tail call float @llvm.cos.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -377,25 +231,25 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.log2.f64(double)
-declare float @llvm.log2.f32(float)
+declare double @llvm.exp.f64(double)
+declare float @llvm.exp.f32(float)
 
-define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log2_f64
+define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @log2_f64
+; SLEEF-SVE-LABEL: define void @exp_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log2_f64
+; ARMPL-NEON-LABEL: define void @exp_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @log2_f64
+; ARMPL-SVE-LABEL: define void @exp_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -404,7 +258,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.log2.f64(double %in)
+  %call = tail call double @llvm.exp.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -415,22 +269,22 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log2_f32
+define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @log2_f32
+; SLEEF-SVE-LABEL: define void @exp_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log2_f32
+; ARMPL-NEON-LABEL: define void @exp_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @log2_f32
+; ARMPL-SVE-LABEL: define void @exp_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -439,7 +293,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.log2.f32(float %in)
+  %call = tail call float @llvm.exp.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -450,25 +304,25 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.log10.f64(double)
-declare float @llvm.log10.f32(float)
+declare double @llvm.exp10.f64(double)
+declare float @llvm.exp10.f32(float)
 
-define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log10_f64
+define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @log10_f64
+; SLEEF-SVE-LABEL: define void @exp10_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log10_f64
+; ARMPL-NEON-LABEL: define void @exp10_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @log10_f64
+; ARMPL-SVE-LABEL: define void @exp10_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -477,7 +331,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.log10.f64(double %in)
+  %call = tail call double @llvm.exp10.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -488,22 +342,22 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @log10_f32
+define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @log10_f32
+; SLEEF-SVE-LABEL: define void @exp10_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @log10_f32
+; ARMPL-NEON-LABEL: define void @exp10_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @log10_f32
+; ARMPL-SVE-LABEL: define void @exp10_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -512,7 +366,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.log10.f32(float %in)
+  %call = tail call float @llvm.exp10.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -523,25 +377,25 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.sin.f64(double)
-declare float @llvm.sin.f32(float)
+declare double @llvm.exp2.f64(double)
+declare float @llvm.exp2.f32(float)
 
-define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sin_f64
+define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sin_f64
+; SLEEF-SVE-LABEL: define void @exp2_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sin_f64
+; ARMPL-NEON-LABEL: define void @exp2_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sin_f64
+; ARMPL-SVE-LABEL: define void @exp2_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -550,7 +404,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.sin.f64(double %in)
+  %call = tail call double @llvm.exp2.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -561,22 +415,22 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @sin_f32
+define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @exp2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @sin_f32
+; SLEEF-SVE-LABEL: define void @exp2_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @sin_f32
+; ARMPL-NEON-LABEL: define void @exp2_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @sin_f32
+; ARMPL-SVE-LABEL: define void @exp2_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -585,7 +439,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.sin.f32(float %in)
+  %call = tail call float @llvm.exp2.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -596,25 +450,25 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.pow.f64(double, double)
-declare float @llvm.pow.f32(float, float)
+declare double @llvm.fabs.f64(double)
+declare float @llvm.fabs.f32(float)
 
-define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @pow_f64
+define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fabs_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @pow_f64
+; SLEEF-SVE-LABEL: define void @fabs_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @pow_f64
+; ARMPL-NEON-LABEL: define void @fabs_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @pow_f64
+; ARMPL-SVE-LABEL: define void @fabs_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 ;
   entry:
   br label %for.body
@@ -623,7 +477,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.pow.f64(double %in, double %in)
+  %call = tail call double @llvm.fabs.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -634,22 +488,22 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @pow_f32
+define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fabs_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @pow_f32
+; SLEEF-SVE-LABEL: define void @fabs_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @pow_f32
+; ARMPL-NEON-LABEL: define void @fabs_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @pow_f32
+; ARMPL-SVE-LABEL: define void @fabs_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 ;
   entry:
   br label %for.body
@@ -658,7 +512,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.pow.f32(float %in, float %in)
+  %call = tail call float @llvm.fabs.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -669,25 +523,25 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.ceil.f64(double)
-declare float @llvm.ceil.f32(float)
+declare double @llvm.floor.f64(double)
+declare float @llvm.floor.f32(float)
 
-define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @ceil_f64
+define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @floor_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @ceil_f64
+; SLEEF-SVE-LABEL: define void @floor_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @ceil_f64
+; ARMPL-NEON-LABEL: define void @floor_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @ceil_f64
+; ARMPL-SVE-LABEL: define void @floor_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
 ;
   entry:
   br label %for.body
@@ -696,7 +550,7 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.ceil.f64(double %in)
+  %call = tail call double @llvm.floor.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -707,22 +561,22 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @ceil_f32
+define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @floor_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @ceil_f32
+; SLEEF-SVE-LABEL: define void @floor_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @ceil_f32
+; ARMPL-NEON-LABEL: define void @floor_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @ceil_f32
+; ARMPL-SVE-LABEL: define void @floor_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
 ;
   entry:
   br label %for.body
@@ -731,7 +585,7 @@ define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.ceil.f32(float %in)
+  %call = tail call float @llvm.floor.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -742,25 +596,25 @@ define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.copysign.f64(double, double)
-declare float @llvm.copysign.f32(float, float)
+declare double @llvm.fma.f64(double, double, double)
+declare float @llvm.fma.f32(float, float, float)
 
-define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @copysign_f64
+define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fma_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @copysign_f64
+; SLEEF-SVE-LABEL: define void @fma_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 ;
-; ARMPL-NEON-LABEL: define void @copysign_f64
+; ARMPL-NEON-LABEL: define void @fma_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @copysign_f64
+; ARMPL-SVE-LABEL: define void @fma_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
 ;
   entry:
   br label %for.body
@@ -769,7 +623,7 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.copysign.f64(double %in, double %in)
+  %call = tail call double @llvm.fma.f64(double %in, double %in, double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -780,22 +634,22 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @copysign_f32
+define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fma_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
 ;
-; SLEEF-SVE-LABEL: define void @copysign_f32
+; SLEEF-SVE-LABEL: define void @fma_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 ;
-; ARMPL-NEON-LABEL: define void @copysign_f32
+; ARMPL-NEON-LABEL: define void @fma_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
 ;
-; ARMPL-SVE-LABEL: define void @copysign_f32
+; ARMPL-SVE-LABEL: define void @fma_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
 ;
   entry:
   br label %for.body
@@ -804,7 +658,7 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.copysign.f32(float %in, float %in)
+  %call = tail call float @llvm.fma.f32(float %in, float %in, float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -815,25 +669,25 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.fabs.f64(double)
-declare float @llvm.fabs.f32(float)
+declare double @llvm.log.f64(double)
+declare float @llvm.log.f32(float)
 
-define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fabs_f64
+define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fabs_f64
+; SLEEF-SVE-LABEL: define void @log_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fabs_f64
+; ARMPL-NEON-LABEL: define void @log_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fabs_f64
+; ARMPL-SVE-LABEL: define void @log_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -842,7 +696,7 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.fabs.f64(double %in)
+  %call = tail call double @llvm.log.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -853,22 +707,22 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fabs_f32
+define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fabs_f32
+; SLEEF-SVE-LABEL: define void @log_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fabs_f32
+; ARMPL-NEON-LABEL: define void @log_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fabs_f32
+; ARMPL-SVE-LABEL: define void @log_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -877,7 +731,7 @@ define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.fabs.f32(float %in)
+  %call = tail call float @llvm.log.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -888,25 +742,25 @@ define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.floor.f64(double)
-declare float @llvm.floor.f32(float)
+declare double @llvm.log10.f64(double)
+declare float @llvm.log10.f32(float)
 
-define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @floor_f64
+define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log10_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @floor_f64
+; SLEEF-SVE-LABEL: define void @log10_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @floor_f64
+; ARMPL-NEON-LABEL: define void @log10_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @floor_f64
+; ARMPL-SVE-LABEL: define void @log10_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -915,7 +769,7 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.floor.f64(double %in)
+  %call = tail call double @llvm.log10.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -926,22 +780,22 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @floor_f32
+define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log10_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @floor_f32
+; SLEEF-SVE-LABEL: define void @log10_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @floor_f32
+; ARMPL-NEON-LABEL: define void @log10_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @floor_f32
+; ARMPL-SVE-LABEL: define void @log10_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -950,7 +804,7 @@ define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.floor.f32(float %in)
+  %call = tail call float @llvm.log10.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -961,25 +815,25 @@ define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-declare double @llvm.fma.f64(double, double, double)
-declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.log2.f64(double)
+declare float @llvm.log2.f32(float)
 
-define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fma_f64
+define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log2_f64
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fma_f64
+; SLEEF-SVE-LABEL: define void @log2_f64
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fma_f64
+; ARMPL-NEON-LABEL: define void @log2_f64
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fma_f64
+; ARMPL-SVE-LABEL: define void @log2_f64
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -988,7 +842,7 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
   %in = load double, ptr %in.gep, align 8
-  %call = tail call double @llvm.fma.f64(double %in, double %in, double %in)
+  %call = tail call double @llvm.log2.f64(double %in)
   %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
   store double %call, ptr %out.gep, align 8
   %iv.next = add nuw nsw i64 %iv, 1
@@ -999,22 +853,22 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
-define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
-; SLEEF-NEON-LABEL: define void @fma_f32
+define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @log2_f32
 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; SLEEF-SVE-LABEL: define void @fma_f32
+; SLEEF-SVE-LABEL: define void @log2_f32
 ; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
-; ARMPL-NEON-LABEL: define void @fma_f32
+; ARMPL-NEON-LABEL: define void @log2_f32
 ; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
 ;
-; ARMPL-SVE-LABEL: define void @fma_f32
+; ARMPL-SVE-LABEL: define void @log2_f32
 ; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
 ;
   entry:
   br label %for.body
@@ -1023,7 +877,7 @@ define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
   %in = load float, ptr %in.gep, align 8
-  %call = tail call float @llvm.fma.f32(float %in, float %in, float %in)
+  %call = tail call float @llvm.log2.f32(float %in)
   %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
   store float %call, ptr %out.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
@@ -1253,6 +1107,79 @@ define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+
+define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @pow_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; SLEEF-SVE-LABEL: define void @pow_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @pow_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; ARMPL-SVE-LABEL: define void @pow_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @llvm.pow.f64(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @pow_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; SLEEF-SVE-LABEL: define void @pow_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @pow_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; ARMPL-SVE-LABEL: define void @pow_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @llvm.pow.f32(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @llvm.rint.f64(double)
 declare float @llvm.rint.f32(float)
 
@@ -1399,6 +1326,79 @@ define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   ret void
 }
 
+declare double @llvm.sin.f64(double)
+declare float @llvm.sin.f32(float)
+
+define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sin_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @sin_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @sin_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @sin_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @llvm.sin.f64(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
+; SLEEF-NEON-LABEL: define void @sin_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @sin_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SLEEF-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @sin_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @sin_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; ARMPL-SVE:    [[TMP15:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @llvm.sin.f32(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @llvm.sqrt.f64(double)
 declare float @llvm.sqrt.f32(float)
 
@@ -1544,4 +1544,3 @@ define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) {
   for.end:
   ret void
 }
-