[llvm] Reorganise tests for using SLEEF symbols in LV. (PR #68207)

Wed Oct 4 04:27:35 PDT 2023

https://github.com/JolantaJensen created https://github.com/llvm/llvm-project/pull/68207

The tests introduced by https://reviews.llvm.org/D134719 and later modified in https://reviews.llvm.org/D146839 are not testing LV in isolation. This patch:
  1. Assures that all tests test LV in isolation.
  2. Adds LV tests using llvm intrinsics that have libm mappings.

llrint, llround and lrint are not included as currently IR verifier pass does not allow to use vector types with them.

>From 48e3552eab987570aa7178092d963b619b06fcb5 Mon Sep 17 00:00:00 2001
From: Jolanta Jensen <Jolanta.Jensen at arm.com>
Date: Sat, 12 Aug 2023 19:09:39 +0000
Subject: [PATCH] Reorganise tests for using SLEEF symbols in LV.

The tests introduced by https://reviews.llvm.org/D134719 and later
modified in https://reviews.llvm.org/D146839 are not testing LV in
isolation. This patch:
  1. Assures that all tests test LV in isolation.
  2. Adds LV tests using llvm intrinsics that have libm mappings.

llrint, llround and lrint are not included as currently
IR verifier pass does not allow to use vector types with them.
---
 .../AArch64/sleef-calls-aarch64.ll            |  934 +++++++-----
 .../AArch64/sleef-intrinsic-calls-aarch64.ll  | 1311 +++++++++++++++++
 2 files changed, 1890 insertions(+), 355 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
index d25e24efd5a238e..d7dc122edaf7e2b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
@@ -1,21 +1,26 @@
-; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail.
-; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,NEON
-; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,SVE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(_)|(cos|exp[^e]|fmod|gamma|log|pow|sin|sqrt|tan)|(ret)" --version 2
+; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-declare double @acos(double) #0
-declare float @acosf(float) #0
-declare double @llvm.acos.f64(double) #0
-declare float @llvm.acos.f32(float) #0
+declare double @acos(double)
+declare float @acosf(float)
 
 define void @acos_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @acos_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @acos_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @acos(double [[CONV:%.*]]) #[[ATTR0:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @acos_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0:[0-9]+]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @acos(double [[CONV:%.*]]) #[[ATTR2:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -35,11 +40,18 @@ define void @acos_f64(double* nocapture %varray) {
 }
 
 define void @acos_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @acos_f32(
-  ; NEON:    [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]])
-  ; SVE:     [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @acos_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @acosf(float [[CONV:%.*]]) #[[ATTR1:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @acos_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @acosf(float [[CONV:%.*]]) #[[ATTR3:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -58,17 +70,22 @@ define void @acos_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @asin(double) #0
-declare float @asinf(float) #0
-declare double @llvm.asin.f64(double) #0
-declare float @llvm.asin.f32(float) #0
+declare double @asin(double)
+declare float @asinf(float)
 
 define void @asin_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @asin_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @asin_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @asin(double [[CONV:%.*]]) #[[ATTR2:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @asin_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @asin(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -88,11 +105,18 @@ define void @asin_f64(double* nocapture %varray) {
 }
 
 define void @asin_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @asin_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @asin_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @asinf(float [[CONV:%.*]]) #[[ATTR3:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @asin_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @asinf(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -111,17 +135,22 @@ define void @asin_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @atan(double) #0
-declare float @atanf(float) #0
-declare double @llvm.atan.f64(double) #0
-declare float @llvm.atan.f32(float) #0
+declare double @atan(double)
+declare float @atanf(float)
 
 define void @atan_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @atan_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @atan_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @atan(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @atan_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @atan(double [[CONV:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -141,11 +170,18 @@ define void @atan_f64(double* nocapture %varray) {
 }
 
 define void @atan_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @atan_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @atan_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @atanf(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @atan_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @atanf(float [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -164,17 +200,22 @@ define void @atan_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @atan2(double, double) #0
-declare float @atan2f(float, float) #0
-declare double @llvm.atan2.f64(double, double) #0
-declare float @llvm.atan2.f32(float, float) #0
+declare double @atan2(double, double)
+declare float @atan2f(float, float)
 
 define void @atan2_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @atan2_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @atan2_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @atan2(double [[CONV:%.*]], double [[CONV]]) #[[ATTR6:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @atan2_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @atan2(double [[CONV:%.*]], double [[CONV]]) #[[ATTR8:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -194,11 +235,18 @@ define void @atan2_f64(double* nocapture %varray) {
 }
 
 define void @atan2_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @atan2_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @atan2_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @atan2f(float [[CONV:%.*]], float [[CONV]]) #[[ATTR7:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @atan2_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @atan2f(float [[CONV:%.*]], float [[CONV]]) #[[ATTR9:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -217,17 +265,22 @@ define void @atan2_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @atanh(double) #0
-declare float @atanhf(float) #0
-declare double @llvm.atanh.f64(double) #0
-declare float @llvm.atanh.f32(float) #0
+declare double @atanh(double)
+declare float @atanhf(float)
 
 define void @atanh_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @atanh_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atanh(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @atanh_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @atanh(double [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @atanh_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atanh(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @atanh(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -247,11 +300,18 @@ define void @atanh_f64(double* nocapture %varray) {
 }
 
 define void @atanh_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @atanh_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanhf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @atanh_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @atanhf(float [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @atanh_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanhf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @atanhf(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -270,17 +330,22 @@ define void @atanh_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @cos(double) #0
-declare float @cosf(float) #0
-declare double @llvm.cos.f64(double) #0
-declare float @llvm.cos.f32(float) #0
+declare double @cos(double)
+declare float @cosf(float)
 
 define void @cos_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @cos_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @cos_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @cos(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @cos_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @cos(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -300,11 +365,18 @@ define void @cos_f64(double* nocapture %varray) {
 }
 
 define void @cos_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @cos_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @cos_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @cosf(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @cos_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @cosf(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -323,17 +395,22 @@ define void @cos_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @cosh(double) #0
-declare float @coshf(float) #0
-declare double @llvm.cosh.f64(double) #0
-declare float @llvm.cosh.f32(float) #0
+declare double @cosh(double)
+declare float @coshf(float)
 
 define void @cosh_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @cosh_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @cosh_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @cosh(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @cosh_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @cosh(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -353,11 +430,18 @@ define void @cosh_f64(double* nocapture %varray) {
 }
 
 define void @cosh_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @cosh_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @cosh_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @coshf(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @cosh_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @coshf(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -376,17 +460,22 @@ define void @cosh_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @exp(double) #0
-declare float @expf(float) #0
-declare double @llvm.exp.f64(double) #0
-declare float @llvm.exp.f32(float) #0
+declare double @exp(double)
+declare float @expf(float)
 
 define void @exp_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @exp_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @exp_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @exp(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @exp(double [[CONV:%.*]]) #[[ATTR16:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -406,11 +495,18 @@ define void @exp_f64(double* nocapture %varray) {
 }
 
 define void @exp_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @exp_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @exp_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @expf(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @expf(float [[CONV:%.*]]) #[[ATTR17:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -429,17 +525,22 @@ define void @exp_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @exp2(double) #0
-declare float @exp2f(float) #0
-declare double @llvm.exp2.f64(double) #0
-declare float @llvm.exp2.f32(float) #0
+declare double @exp2(double)
+declare float @exp2f(float)
 
 define void @exp2_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @exp2_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @exp2_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @exp2(double [[CONV:%.*]]) #[[ATTR16:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp2_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @exp2(double [[CONV:%.*]]) #[[ATTR18:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -459,11 +560,18 @@ define void @exp2_f64(double* nocapture %varray) {
 }
 
 define void @exp2_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @exp2_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @exp2_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @exp2f(float [[CONV:%.*]]) #[[ATTR17:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp2_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @exp2f(float [[CONV:%.*]]) #[[ATTR19:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -482,17 +590,22 @@ define void @exp2_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @exp10(double) #0
-declare float @exp10f(float) #0
-declare double @llvm.exp10.f64(double) #0
-declare float @llvm.exp10.f32(float) #0
+declare double @exp10(double)
+declare float @exp10f(float)
 
 define void @exp10_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @exp10_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @exp10_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @exp10(double [[CONV:%.*]]) #[[ATTR18:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp10_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @exp10(double [[CONV:%.*]]) #[[ATTR20:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -512,11 +625,18 @@ define void @exp10_f64(double* nocapture %varray) {
 }
 
 define void @exp10_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @exp10_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @exp10_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @exp10f(float [[CONV:%.*]]) #[[ATTR19:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp10_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @exp10f(float [[CONV:%.*]]) #[[ATTR21:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -535,14 +655,25 @@ define void @exp10_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @fmod(double, double) #0
-declare float @fmodf(float, float) #0
+; There are no TLI mappings to fixed vector functions for fmod and fmodf.
+
+declare double @fmod(double, double)
+declare float @fmodf(float, float)
 
 define void @fmod_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @fmod_f64(
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @fmod_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP3:%.*]] = tail call double @fmod(double [[TMP2:%.*]], double [[TMP2]]) #[[ATTR20:[0-9]+]]
+; NEON:    [[TMP5:%.*]] = tail call double @fmod(double [[TMP4:%.*]], double [[TMP4]]) #[[ATTR20]]
+; NEON:    [[CALL:%.*]] = tail call double @fmod(double [[CONV:%.*]], double [[CONV]]) #[[ATTR20]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @fmod_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @fmod(double [[CONV:%.*]], double [[CONV]]) #[[ATTR22:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -562,10 +693,19 @@ define void @fmod_f64(double* nocapture %varray) {
 }
 
 define void @fmod_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @fmod_f32(
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @fmod_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP3:%.*]] = tail call float @fmodf(float [[TMP2:%.*]], float [[TMP2]]) #[[ATTR21:[0-9]+]]
+; NEON:    [[TMP5:%.*]] = tail call float @fmodf(float [[TMP4:%.*]], float [[TMP4]]) #[[ATTR21]]
+; NEON:    [[CALL:%.*]] = tail call float @fmodf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR21]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @fmod_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @fmodf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR23:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -584,17 +724,22 @@ define void @fmod_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @lgamma(double) #0
-declare float @lgammaf(float) #0
-declare double @llvm.lgamma.f64(double) #0
-declare float @llvm.lgamma.f32(float) #0
+declare double @lgamma(double)
+declare float @lgammaf(float)
 
 define void @lgamma_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @lgamma_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_lgamma(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @lgamma_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @lgamma(double [[CONV:%.*]]) #[[ATTR22:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @lgamma_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_lgamma(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @lgamma(double [[CONV:%.*]]) #[[ATTR24:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -614,11 +759,18 @@ define void @lgamma_f64(double* nocapture %varray) {
 }
 
 define void @lgamma_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @lgamma_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_lgammaf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @lgamma_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @lgammaf(float [[CONV:%.*]]) #[[ATTR23:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @lgamma_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_lgammaf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @lgammaf(float [[CONV:%.*]]) #[[ATTR25:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -637,17 +789,22 @@ define void @lgamma_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @log10(double) #0
-declare float @log10f(float) #0
-declare double @llvm.log10.f64(double) #0
-declare float @llvm.log10.f32(float) #0
+declare double @log10(double)
+declare float @log10f(float)
 
 define void @log10_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @log10_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @log10_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @log10(double [[CONV:%.*]]) #[[ATTR24:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log10_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @log10(double [[CONV:%.*]]) #[[ATTR26:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -667,11 +824,18 @@ define void @log10_f64(double* nocapture %varray) {
 }
 
 define void @log10_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @log10_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @log10_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @log10f(float [[CONV:%.*]]) #[[ATTR25:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log10_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @log10f(float [[CONV:%.*]]) #[[ATTR27:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -690,17 +854,22 @@ define void @log10_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @log2(double) #0
-declare float @log2f(float) #0
-declare double @llvm.log2.f64(double) #0
-declare float @llvm.log2.f32(float) #0
+declare double @log2(double)
+declare float @log2f(float)
 
 define void @log2_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @log2_f64(
-  ; NEON:    [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP4:%.*]])
-  ; SVE:     [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:   ret void
-  ;
+; NEON-LABEL: define void @log2_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @log2(double [[CONV:%.*]]) #[[ATTR26:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log2_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @log2(double [[CONV:%.*]]) #[[ATTR28:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -720,11 +889,18 @@ define void @log2_f64(double* nocapture %varray) {
 }
 
 define void @log2_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @log2_f32(
-  ; NEON:    [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP4:%.*]])
-  ; SVE:     [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:   ret void
-  ;
+; NEON-LABEL: define void @log2_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @log2f(float [[CONV:%.*]]) #[[ATTR27:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log2_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @log2f(float [[CONV:%.*]]) #[[ATTR29:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -743,17 +919,22 @@ define void @log2_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @log(double) #0
-declare float @logf(float) #0
-declare double @llvm.log.f64(double) #0
-declare float @llvm.log.f32(float) #0
+declare double @log(double)
+declare float @logf(float)
 
 define void @log_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @log_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @log_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @log(double [[CONV:%.*]]) #[[ATTR28:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @log(double [[CONV:%.*]]) #[[ATTR30:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -773,11 +954,18 @@ define void @log_f64(double* nocapture %varray) {
 }
 
 define void @log_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @log_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @log_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @logf(float [[CONV:%.*]]) #[[ATTR29:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @logf(float [[CONV:%.*]]) #[[ATTR31:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -796,17 +984,22 @@ define void @log_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @pow(double, double) #0
-declare float @powf(float, float) #0
-declare double @llvm.pow.f64(double, double) #0
-declare float @llvm.pow.f32(float, float) #0
+declare double @pow(double, double)
+declare float @powf(float, float)
 
 define void @pow_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @pow_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @pow_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @pow(double [[CONV:%.*]], double [[CONV]]) #[[ATTR30:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @pow_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @pow(double [[CONV:%.*]], double [[CONV]]) #[[ATTR32:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -826,11 +1019,18 @@ define void @pow_f64(double* nocapture %varray) {
 }
 
 define void @pow_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @pow_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @pow_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @powf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR31:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @pow_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @powf(float [[CONV:%.*]], float [[CONV]]) #[[ATTR33:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -849,17 +1049,22 @@ define void @pow_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @sin(double) #0
-declare float @sinf(float) #0
-declare double @llvm.sin.f64(double) #0
-declare float @llvm.sin.f32(float) #0
+declare double @sin(double)
+declare float @sinf(float)
 
 define void @sin_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @sin_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @sin_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @sin(double [[CONV:%.*]]) #[[ATTR32:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @sin_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @sin(double [[CONV:%.*]]) #[[ATTR34:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -879,11 +1084,18 @@ define void @sin_f64(double* nocapture %varray) {
 }
 
 define void @sin_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @sin_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @sin_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @sinf(float [[CONV:%.*]]) #[[ATTR33:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @sin_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @sinf(float [[CONV:%.*]]) #[[ATTR35:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -902,17 +1114,22 @@ define void @sin_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @sinh(double) #0
-declare float @sinhf(float) #0
-declare double @llvm.sinh.f64(double) #0
-declare float @llvm.sinh.f32(float) #0
+declare double @sinh(double)
+declare float @sinhf(float)
 
 define void @sinh_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @sinh_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @sinh_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @sinh(double [[CONV:%.*]]) #[[ATTR34:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @sinh_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @sinh(double [[CONV:%.*]]) #[[ATTR36:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -932,11 +1149,18 @@ define void @sinh_f64(double* nocapture %varray) {
 }
 
 define void @sinh_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @sinh_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @sinh_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @sinhf(float [[CONV:%.*]]) #[[ATTR35:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @sinh_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @sinhf(float [[CONV:%.*]]) #[[ATTR37:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -955,17 +1179,22 @@ define void @sinh_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @sqrt(double) #0
-declare float @sqrtf(float) #0
-declare double @llvm.sqrt.f64(double) #0
-declare float @llvm.sqrt.f32(float) #0
+declare double @sqrt(double)
+declare float @sqrtf(float)
 
 define void @sqrt_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @sqrt_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sqrt(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @sqrt_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @sqrt(double [[CONV:%.*]]) #[[ATTR36:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @sqrt_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sqrt(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @sqrt(double [[CONV:%.*]]) #[[ATTR38:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -985,11 +1214,18 @@ define void @sqrt_f64(double* nocapture %varray) {
 }
 
 define void @sqrt_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @sqrt_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sqrtf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @sqrt_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @sqrtf(float [[CONV:%.*]]) #[[ATTR37:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @sqrt_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sqrtf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @sqrtf(float [[CONV:%.*]]) #[[ATTR39:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -1008,65 +1244,22 @@ define void @sqrt_f32(float* nocapture %varray) {
   ret void
 }
 
-define void @llvm_sqrt_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @llvm_sqrt_f64(
-  ; NEON:     [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[TMP4:%.*]])
-  ; CHECK:    ret void
-  ;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %tmp = trunc i64 %iv to i32
-  %conv = sitofp i32 %tmp to double
-  %call = tail call fast double @llvm.sqrt.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 8
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-define void @llvm_sqrt_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @llvm_sqrt_f32(
-  ; NEON:     [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[TMP4:%.*]])
-  ; CHECK:    ret void
-  ;
-  entry:
-  br label %for.body
-
-  for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %tmp = trunc i64 %iv to i32
-  %conv = sitofp i32 %tmp to float
-  %call = tail call fast float @llvm.sqrt.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-  for.end:
-  ret void
-}
-
-declare double @tan(double) #0
-declare float @tanf(float) #0
-declare double @llvm.tan.f64(double) #0
-declare float @llvm.tan.f32(float) #0
+declare double @tan(double)
+declare float @tanf(float)
 
 define void @tan_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @tan_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @tan_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @tan(double [[CONV:%.*]]) #[[ATTR38:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @tan_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @tan(double [[CONV:%.*]]) #[[ATTR40:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -1086,11 +1279,18 @@ define void @tan_f64(double* nocapture %varray) {
 }
 
 define void @tan_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @tan_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @tan_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @tanf(float [[CONV:%.*]]) #[[ATTR39:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @tan_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @tanf(float [[CONV:%.*]]) #[[ATTR41:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -1109,17 +1309,22 @@ define void @tan_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @tanh(double) #0
-declare float @tanhf(float) #0
-declare double @llvm.tanh.f64(double) #0
-declare float @llvm.tanh.f32(float) #0
+declare double @tanh(double)
+declare float @tanhf(float)
 
 define void @tanh_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @tanh_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @tanh_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @tanh(double [[CONV:%.*]]) #[[ATTR40:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @tanh_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @tanh(double [[CONV:%.*]]) #[[ATTR42:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -1139,11 +1344,18 @@ define void @tanh_f64(double* nocapture %varray) {
 }
 
 define void @tanh_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @tanh_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @tanh_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @tanhf(float [[CONV:%.*]]) #[[ATTR41:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @tanh_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @tanhf(float [[CONV:%.*]]) #[[ATTR43:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -1162,17 +1374,22 @@ define void @tanh_f32(float* nocapture %varray) {
   ret void
 }
 
-declare double @tgamma(double) #0
-declare float @tgammaf(float) #0
-declare double @llvm.tgamma.f64(double) #0
-declare float @llvm.tgamma.f32(float) #0
+declare double @tgamma(double)
+declare float @tgammaf(float)
 
 define void @tgamma_f64(double* nocapture %varray) {
-  ; CHECK-LABEL: @tgamma_f64(
-  ; NEON:     [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tgamma(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @tgamma_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @tgamma(double [[CONV:%.*]]) #[[ATTR42:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @tgamma_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tgamma(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @tgamma(double [[CONV:%.*]]) #[[ATTR44:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
@@ -1192,11 +1409,18 @@ define void @tgamma_f64(double* nocapture %varray) {
 }
 
 define void @tgamma_f32(float* nocapture %varray) {
-  ; CHECK-LABEL: @tgamma_f32(
-  ; NEON:     [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]])
-  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tgammaf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-  ; CHECK:    ret void
-  ;
+; NEON-LABEL: define void @tgamma_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @tgammaf(float [[CONV:%.*]]) #[[ATTR43:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @tgamma_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tgammaf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @tgammaf(float [[CONV:%.*]]) #[[ATTR45:[0-9]+]]
+; SVE:    ret void
+;
   entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
new file mode 100644
index 000000000000000..715c2c352b7762b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
@@ -0,0 +1,1311 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(\.|_)(ceil|copysign|cos|exp[^e]|exp2|fabs|floor|fma|log|m..num|pow|.*int|round|sin|sqrt|trunc)|(ret)" --version 2
+; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=NEON
+; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SVE
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+
+; Tests are checking if LV can vectorize loops with llvm math intrinsics using mappings
+; from TLI (if such mappings exist) for scalable and fixed width vectors.
+
+declare double @llvm.ceil.f64(double)
+declare float @llvm.ceil.f32(float)
+
+define void @llvm_ceil_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_ceil_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.ceil.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_ceil_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1:[0-9]+]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.ceil.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.ceil.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_ceil_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_ceil_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.ceil.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_ceil_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.ceil.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.ceil.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.copysign.f64(double, double)
+declare float @llvm.copysign.f32(float, float)
+
+define void @llvm_copysign_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_copysign_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.copysign.f64(double [[CONV:%.*]], double [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_copysign_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.copysign.f64(double [[CONV:%.*]], double [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.copysign.f64(double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_copysign_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_copysign_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.copysign.f32(float [[CONV:%.*]], float [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_copysign_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.copysign.f32(float [[CONV:%.*]], float [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.copysign.f32(float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.cos.f64(double)
+declare float @llvm.cos.f32(float)
+
+define void @llvm_cos_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_cos_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV:%.*]]) #[[ATTR1:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_cos_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV:%.*]]) #[[ATTR4:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.cos.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_cos_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_cos_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV:%.*]]) #[[ATTR2:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_cos_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[CONV:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.cos.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.exp.f64(double)
+declare float @llvm.exp.f32(float)
+
+define void @llvm_exp_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[CONV:%.*]]) #[[ATTR3:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_exp_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[CONV:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.exp.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_exp_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[CONV:%.*]]) #[[ATTR4:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_exp_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.exp.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.exp2.f64(double)
+declare float @llvm.exp2.f32(float)
+
+define void @llvm_exp2_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp2_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[CONV:%.*]]) #[[ATTR5:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_exp2_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.exp2.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_exp2_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp2_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[CONV:%.*]]) #[[ATTR6:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_exp2_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.exp2.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.fabs.f64(double)
+declare float @llvm.fabs.f32(float)
+
+define void @llvm_fabs_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_fabs_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.fabs.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_fabs_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.fabs.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.fabs.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+
+define void @llvm_fabs_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_fabs_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.fabs.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_fabs_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.fabs.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.fabs.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.floor.f64(double)
+declare float @llvm.floor.f32(float)
+
+define void @llvm_floor_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_floor_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.floor.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_floor_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.floor.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.floor.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_floor_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_floor_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.floor.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_floor_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.floor.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.floor.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.fma.f64(double, double, double)
+declare float @llvm.fma.f32(float, float, float)
+
+define void @llvm_fma_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_fma_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.fma.f64(double [[CONV:%.*]], double [[CONV]], double [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_fma_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.fma.f64(double [[CONV:%.*]], double [[CONV]], double [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.fma.f64(double %conv, double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_fma_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_fma_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.fma.f32(float [[CONV:%.*]], float [[CONV]], float [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_fma_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x float> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.fma.f32(float [[CONV:%.*]], float [[CONV]], float [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.fma.f32(float %conv, float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.log.f64(double)
+declare float @llvm.log.f32(float)
+
+define void @llvm_log_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_log_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_log_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_log_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_log_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_log_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.log10.f64(double)
+declare float @llvm.log10.f32(float)
+
+define void @llvm_log10_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_log10_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_log10_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log10.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_log10_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_log10_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_log10_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log10.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.log2.f64(double)
+declare float @llvm.log2.f32(float)
+
+define void @llvm_log2_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_log2_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_log2_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log2.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_log2_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_log2_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_log2_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[CONV:%.*]]) #[[ATTR15:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log2.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.maxnum.f64(double, double)
+declare float @llvm.maxnum.f32(float, float)
+
+define void @llvm_maxnum_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_maxnum_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.maxnum.f64(double [[CONV:%.*]], double [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_maxnum_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.maxnum.f64(double [[CONV:%.*]], double [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.maxnum.f64(double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_maxnum_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_maxnum_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.maxnum.f32(float [[CONV:%.*]], float [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_maxnum_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.maxnum.f32(float [[CONV:%.*]], float [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.maxnum.f32(float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.minnum.f64(double, double)
+declare float @llvm.minnum.f32(float, float)
+
+define void @llvm_minnum_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_minnum_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.minnum.f64(double [[CONV:%.*]], double [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_minnum_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.minnum.f64(double [[CONV:%.*]], double [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.minnum.f64(double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_minnum_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_minnum_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.minnum.f32(float [[CONV:%.*]], float [[CONV]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_minnum_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.minnum.f32(float [[CONV:%.*]], float [[CONV]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.minnum.f32(float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.nearbyint.f64(double)
+declare float @llvm.nearbyint.f32(float)
+
+define void @llvm_nearbyint_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_nearbyint_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.nearbyint.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_nearbyint_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.nearbyint.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.nearbyint.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_nearbyint_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_nearbyint_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.nearbyint.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_nearbyint_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.nearbyint.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.nearbyint.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+
+define void @llvm_pow_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_pow_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP1:%.*]], <2 x double> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[CONV:%.*]], double [[CONV]]) #[[ATTR13:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_pow_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[CONV:%.*]], double [[CONV]]) #[[ATTR16:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.pow.f64(double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_pow_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_pow_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP1:%.*]], <4 x float> [[TMP1]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[CONV:%.*]], float [[CONV]]) #[[ATTR14:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_pow_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x float> [[TMP11]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[CONV:%.*]], float [[CONV]]) #[[ATTR17:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.pow.f32(float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.rint.f64(double)
+declare float @llvm.rint.f32(float)
+
+define void @llvm_rint_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_rint_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.rint.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_rint_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.rint.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.rint.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_rint_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_rint_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.rint.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_rint_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.rint.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.rint.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.round.f64(double)
+declare float @llvm.round.f32(float)
+
+define void @llvm_round_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_round_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.round.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_round_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.round.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.round.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_round_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_round_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.round.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_round_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.round.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.round.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.sin.f64(double)
+declare float @llvm.sin.f32(float)
+
+define void @llvm_sin_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_sin_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV:%.*]]) #[[ATTR15:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_sin_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV:%.*]]) #[[ATTR18:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.sin.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_sin_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_sin_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV:%.*]]) #[[ATTR16:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_sin_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE:    [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[CONV:%.*]]) #[[ATTR19:[0-9]+]]
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.sin.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.sqrt.f64(double)
+declare float @llvm.sqrt.f32(float)
+
+define void @llvm_sqrt_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_sqrt_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_sqrt_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.sqrt.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.sqrt.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_sqrt_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_sqrt_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_sqrt_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.sqrt.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.sqrt.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @llvm.trunc.f64(double)
+declare float @llvm.trunc.f32(float)
+
+define void @llvm_trunc_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_trunc_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.trunc.f64(double [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_trunc_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.trunc.f64(double [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.trunc.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_trunc_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_trunc_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON:    [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.trunc.f32(float [[CONV:%.*]])
+; NEON:    ret void
+;
+; SVE-LABEL: define void @llvm_trunc_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP12:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[TMP11:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.trunc.f32(float [[CONV:%.*]])
+; SVE:    ret void
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.trunc.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}