[llvm] [TLI][AArch64] Add extra SLEEF mappings and tests (PR #78140)
Maciej Gabka via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 02:16:06 PST 2024
https://github.com/mgabka created https://github.com/llvm/llvm-project/pull/78140
This patch is adding more scalar to vector mappings to TLI for SLEEF vector library.
The added mappings are for the following functions:
acosh, asinh, cbrt, copysign, cospi
erf, erfc, expm1, fdim, fma, fmax, fmin
hypot, ilogb, ldexp, log1p, nextafter, sinpi
>From 32ea5ba9518fb2d7079c878c43408e2b7e62846c Mon Sep 17 00:00:00 2001
From: Maciej Gabka <maciej.gabka at arm.com>
Date: Wed, 10 Jan 2024 11:45:06 +0000
Subject: [PATCH] [TLI][AArch64] Add extra SLEEF mappings and tests
This patch is adding more scalar to vector mappings
to TLI for SLEEF vector library.
The mappings are for following functions:
acosh, asinh, cbrt, copysign, cospi
erf, erfc, expm1, fdim, fma, fmax, fmin
hypot, ilogb, ldexp, log1p, nextafter, sinpi
---
llvm/include/llvm/Analysis/VecFuncs.def | 126 +++++
.../AArch64/veclib-function-calls.ll | 481 ++++++++++++++++--
2 files changed, 551 insertions(+), 56 deletions(-)
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index b22bdd555cd416..d785f040fc2d27 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -472,19 +472,33 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16
TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", FIXED(2), "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", FIXED(2), "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", FIXED(2), "_ZGV_LLVM_N2v")
+
+TLI_DEFINE_VECFUNC("copysign", "_ZGVnN2vv_copysign", FIXED(2), "_ZGV_LLVM_N2vv")
+
TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("cospi", "_ZGVnN2v_cospi", FIXED(2), "_ZGV_LLVM_N2v")
+
+TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", FIXED(2), "_ZGV_LLVM_N2v")
+
+TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", FIXED(2), "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v")
@@ -494,8 +508,24 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v"
TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", FIXED(2), "_ZGV_LLVM_N2v")
+
+TLI_DEFINE_VECFUNC("fdim", "_ZGVnN2vv_fdim", FIXED(2), "_ZGV_LLVM_N2vv")
+
+TLI_DEFINE_VECFUNC("fma", "_ZGVnN2vvv_fma", FIXED(2), "_ZGV_LLVM_N2vvv")
+
+TLI_DEFINE_VECFUNC("fmax", "_ZGVnN2vv_fmax", FIXED(2), "_ZGV_LLVM_N2vv")
+
+TLI_DEFINE_VECFUNC("fmin", "_ZGVnN2vv_fmin", FIXED(2), "_ZGV_LLVM_N2vv")
+
TLI_DEFINE_VECFUNC("fmod", "_ZGVnN2vv_fmod", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", FIXED(2), "_ZGV_LLVM_N2vv")
+
+TLI_DEFINE_VECFUNC("ilogb", "_ZGVnN2v_ilogb", FIXED(2), "_ZGV_LLVM_N2v")
+
+TLI_DEFINE_VECFUNC("ldexp", "_ZGVnN2vv_ldexp", FIXED(2), "_ZGV_LLVM_N2vv")
+
TLI_DEFINE_VECFUNC("lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v")
@@ -504,11 +534,15 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", FIXED(2), "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
+TLI_DEFINE_VECFUNC("nextafter", "_ZGVnN2vv_nextafter", FIXED(2), "_ZGV_LLVM_N2vv")
+
TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
@@ -521,6 +555,8 @@ TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2v
TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("sinpi", "_ZGVnN2v_sinpi", FIXED(2), "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
@@ -533,19 +569,33 @@ TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", FIXED(4), "_ZGV_LLVM_N4v")
+
TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", FIXED(4), "_ZGV_LLVM_N4v")
+
TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("copysignf", "_ZGVnN4vv_copysignf", FIXED(4), "_ZGV_LLVM_N4vv")
+
TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("cospif", "_ZGVnN2v_cospif", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("erff", "_ZGVnN2v_erff", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN2v_erfcf", FIXED(4), "_ZGV_LLVM_N4v")
+
TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -555,8 +605,24 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v
TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("fdimf", "_ZGVnN4vv_fdimf", FIXED(4), "_ZGV_LLVM_N4vv")
+
+TLI_DEFINE_VECFUNC("fmaf", "_ZGVnN4vvv_fmaf", FIXED(4), "_ZGV_LLVM_N4vvv")
+
+TLI_DEFINE_VECFUNC("fmaxf", "_ZGVnN4vv_fmaxf", FIXED(4), "_ZGV_LLVM_N4vv")
+
+TLI_DEFINE_VECFUNC("fminf", "_ZGVnN4vv_fminf", FIXED(4), "_ZGV_LLVM_N4vv")
+
TLI_DEFINE_VECFUNC("fmodf", "_ZGVnN4vv_fmodf", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", FIXED(4), "_ZGV_LLVM_N4vv")
+
+TLI_DEFINE_VECFUNC("ilogbf", "_ZGVnN4v_ilogbf", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("ldexpf", "_ZGVnN4vv_ldexpf", FIXED(4), "_ZGV_LLVM_N4vv")
+
TLI_DEFINE_VECFUNC("lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -565,11 +631,15 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", FIXED(4), "_ZGV_LLVM_N4v")
+
TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
+TLI_DEFINE_VECFUNC("nextafterf", "_ZGVnN4vv_nextafterf", FIXED(4), "_ZGV_LLVM_N4vv")
+
TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
@@ -582,6 +652,8 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("sinpif", "_ZGVnN4v_sinpif", FIXED(4), "_ZGV_LLVM_N4v")
+
TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -595,9 +667,15 @@ TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -607,6 +685,12 @@ TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv
TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("copysign", "_ZGVsMxvv_copysign", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("copysignf", "_ZGVsMxvv_copysignf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -615,6 +699,15 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsM
TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("cospi", "_ZGVsMxv_cospi", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("cospif", "_ZGVsMxv_cospif", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -630,9 +723,33 @@ TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("fdim", "_ZGVsMxvv_fdim", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("fdimf", "_ZGVsMxvv_fdimf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
+TLI_DEFINE_VECFUNC("fma", "_ZGVsMxvvv_fma", SCALABLE(2), MASKED, "_ZGVsMxvvv")
+TLI_DEFINE_VECFUNC("fmaf", "_ZGVsMxvvv_fmaf", SCALABLE(4), MASKED, "_ZGVsMxvvv")
+
+TLI_DEFINE_VECFUNC("fmax", "_ZGVsMxvv_fmax", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("fmaxf", "_ZGVsMxvv_fmaxf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
+TLI_DEFINE_VECFUNC("fmin", "_ZGVsMxvv_fmin", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("fminf", "_ZGVsMxvv_fminf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
+TLI_DEFINE_VECFUNC("ilogb", "_ZGVsMxv_ilogb", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("ilogbf", "_ZGVsMxv_ilogbf", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("ldexp", "_ZGVsMxvv_ldexp", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("ldexpf", "_ZGVsMxvv_ldexpf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -646,6 +763,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -654,6 +774,9 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGV
TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -673,6 +796,9 @@ TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "
TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("sinpi", "_ZGVsMxv_sinpi", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("sinpif", "_ZGVsMxv_sinpif", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("sqrt", "_ZGVsMxv_sqrt", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED, "_ZGVsMxv")
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
index 2eb7151ffe31a7..dd9a329c0d8891 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll
@@ -90,11 +90,11 @@ declare float @acoshf(float)
define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @acosh_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @acosh_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acosh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @acosh_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -125,11 +125,11 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @acosh_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @acosh_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acoshf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @acosh_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -236,11 +236,11 @@ declare float @asinhf(float)
define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @asinh_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @asinh_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @asinh_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -271,11 +271,11 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @asinh_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @asinh_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @asinh_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -528,11 +528,11 @@ declare float @cbrtf(float)
define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @cbrt_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @cbrt_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cbrt(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @cbrt_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -563,11 +563,11 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @cbrt_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @cbrt_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cbrtf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @cbrt_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -601,11 +601,11 @@ declare float @copysignf(float, float)
define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @copysign_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_copysign(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @copysign_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_copysign(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @copysign_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -636,11 +636,11 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @copysign_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_copysignf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @copysign_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_copysignf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @copysign_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -814,17 +814,90 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
ret void
}
+declare double @cospi(double)
+declare float @cospif(float)
+
+define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @cospi_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cospi(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @cospi_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cospi(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @cospi_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @cospi_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+ %in = load double, ptr %in.gep, align 8
+ %call = tail call double @cospi(double %in)
+ %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+ store double %call, ptr %out.gep, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @cospi_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN2v_cospif(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @cospi_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cospif(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @cospi_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @cospi_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+ %in = load float, ptr %in.gep, align 8
+ %call = tail call float @cospif(float %in)
+ %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+ store float %call, ptr %out.gep, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @erf(double)
declare float @erff(float)
define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @erf_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @erf_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erf(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @erf_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -855,11 +928,11 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @erf_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN2v_erff(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @erf_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erff(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @erf_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -893,11 +966,11 @@ declare float @erfcf(float)
define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @erfc_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @erfc_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_erfc(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @erfc_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -928,11 +1001,11 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @erfc_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN2v_erfcf(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @erfc_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_erfcf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @erfc_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1185,11 +1258,11 @@ declare float @expm1f(float)
define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @expm1_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @expm1_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_expm1(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @expm1_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1220,11 +1293,11 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @expm1_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @expm1_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expm1f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @expm1_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1258,11 +1331,11 @@ declare float @fdimf(float, float)
define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @fdim_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fdim(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @fdim_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fdim(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @fdim_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1293,11 +1366,11 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @fdim_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fdimf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @fdim_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fdimf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @fdim_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1331,11 +1404,11 @@ declare float @fmaf(float, float, float)
define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @fma_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vvv_fma(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @fma_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvvv_fma(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @fma_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1366,11 +1439,11 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @fma_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vvv_fmaf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @fma_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvvv_fmaf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @fma_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1398,17 +1471,90 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
ret void
}
+declare double @fmax(double, double)
+declare float @fmaxf(float, float)
+
+define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fmax_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmax(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
+;
+; SLEEF-SVE-LABEL: define void @fmax_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmax(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @fmax_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]])
+;
+; ARMPL-SVE-LABEL: define void @fmax_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+ %in = load double, ptr %in.gep, align 8
+ %call = tail call double @fmax(double %in, double %in)
+ %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+ store double %call, ptr %out.gep, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @fmax_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fmaxf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
+;
+; SLEEF-SVE-LABEL: define void @fmax_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmaxf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @fmax_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]])
+;
+; ARMPL-SVE-LABEL: define void @fmax_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+ %in = load float, ptr %in.gep, align 8
+ %call = tail call float @fmaxf(float %in, float %in)
+ %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+ store float %call, ptr %out.gep, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @fmin(double, double)
declare float @fminf(float, float)
define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @fmin_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_fmin(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @fmin_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @fmin_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1439,11 +1585,11 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @fmin_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_fminf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @fmin_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fminf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @fmin_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1550,11 +1696,11 @@ declare float @hypotf(float, float)
define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @hypot_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @hypot_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_hypot(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @hypot_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1585,11 +1731,11 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @hypot_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @hypot_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_hypotf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @hypot_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1617,6 +1763,156 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
ret void
}
+declare i32 @ilogb(double)
+declare i32 @ilogbf(float)
+
+define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @ilogb_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x i32> @_ZGVnN2v_ilogb(<2 x double> [[WIDE_LOAD:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @ilogb_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x i32> @_ZGVsMxv_ilogb(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @ilogb_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @ilogb_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+ %in = load double, ptr %in.gep, align 8
+ %call = tail call i32 @ilogb(double %in)
+ %out.gep = getelementptr inbounds i32, ptr %out.ptr, i64 %iv
+ store i32 %call, ptr %out.gep, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @ilogb_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x i32> @_ZGVnN4v_ilogbf(<4 x float> [[WIDE_LOAD:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @ilogb_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x i32> @_ZGVsMxv_ilogbf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @ilogb_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @ilogb_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+ %in = load float, ptr %in.gep, align 8
+ %call = tail call i32 @ilogbf(float %in)
+ %out.gep = getelementptr inbounds i32, ptr %out.ptr, i64 %iv
+ store i32 %call, ptr %out.gep, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @ldexp(double, i32)
+declare float @ldexpf(float, i32)
+
+define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @ldexp_f64
+; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_ldexp(<2 x double> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @ldexp_f64
+; SLEEF-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_ldexp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i32> [[WIDE_MASKED_LOAD1:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @ldexp_f64
+; ARMPL-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @ldexp_f64
+; ARMPL-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in1.gep = getelementptr inbounds double, ptr %in1.ptr, i64 %iv
+ %in1 = load double, ptr %in1.gep, align 8
+ %in2.gep = getelementptr inbounds i32, ptr %in2.ptr, i64 %iv
+ %in2 = load i32, ptr %in2.gep, align 8
+ %call = tail call double @ldexp(double %in1, i32 %in2)
+ %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+ store double %call, ptr %out.gep, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) {
+; SLEEF-NEON-LABEL: define void @ldexp_f32
+; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_ldexpf(<4 x float> [[WIDE_LOAD:%.*]], <4 x i32> [[WIDE_LOAD1:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @ldexp_f32
+; SLEEF-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_ldexpf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD1:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @ldexp_f32
+; ARMPL-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @ldexp_f32
+; ARMPL-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]])
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in1.gep = getelementptr inbounds float, ptr %in1.ptr, i64 %iv
+ %in1 = load float, ptr %in1.gep, align 8
+ %in2.gep = getelementptr inbounds i32, ptr %in2.ptr, i64 %iv
+ %in2 = load i32, ptr %in2.gep, align 8
+ %call = tail call float @ldexpf(float %in1, i32 %in2)
+ %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+ store float %call, ptr %out.gep, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @lgamma(double)
declare float @lgammaf(float)
@@ -1842,11 +2138,11 @@ declare float @log1pf(float)
define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @log1p_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @log1p_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log1p(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @log1p_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -1877,11 +2173,11 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @log1p_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @log1p_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log1pf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @log1p_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -2063,11 +2359,11 @@ declare float @nextafterf(float, float)
define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @nextafter_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_nextafter(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @nextafter_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_nextafter(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @nextafter_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -2098,11 +2394,11 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @nextafter_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_nextafterf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
;
; SLEEF-SVE-LABEL: define void @nextafter_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_nextafterf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @nextafter_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -2349,6 +2645,79 @@ for.cond.cleanup:
ret void
}
+declare void @sincospi(double, ptr, ptr)
+declare void @sincospif(float, ptr, ptr)
+
+define void @test_sincospi(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; SLEEF-NEON-LABEL: define void @test_sincospi
+; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincospi(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @test_sincospi
+; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincospi(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @test_sincospi
+; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @test_sincospi
+; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: call void @armpl_svsincospi_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+ %num = load double, ptr %gepa, align 8
+ %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+ %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+ call void @sincospi(double %num, ptr %gepb, ptr %gepc)
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_sincospif(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; SLEEF-NEON-LABEL: define void @test_sincospif
+; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincospif(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; SLEEF-SVE-LABEL: define void @test_sincospif
+; SLEEF-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincospif(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+; ARMPL-NEON-LABEL: define void @test_sincospif
+; ARMPL-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+;
+; ARMPL-SVE-LABEL: define void @test_sincospif
+; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: call void @armpl_svsincospi_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+ %num = load float, ptr %gepa, align 8
+ %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+ %gepc = getelementptr float, ptr %c, i64 %indvars.iv
+ call void @sincospif(float %num, ptr %gepb, ptr %gepc)
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
declare double @sinh(double)
declare float @sinhf(float)
@@ -2428,11 +2797,11 @@ declare float @sinpif(float)
define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @sinpi_f64
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinpi(<2 x double> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @sinpi_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinpi(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @sinpi_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
@@ -2463,11 +2832,11 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
; SLEEF-NEON-LABEL: define void @sinpi_f32
; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-NEON: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+; SLEEF-NEON: [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinpif(<4 x float> [[WIDE_LOAD:%.*]])
;
; SLEEF-SVE-LABEL: define void @sinpi_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
-; SLEEF-SVE: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]])
+; SLEEF-SVE: [[TMP15:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinpif(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
;
; ARMPL-NEON-LABEL: define void @sinpi_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
More information about the llvm-commits
mailing list