[clang] c4fa504 - [AArch64] Enable libm vectorized functions via SLEEF
Daniel Kiss via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 20 09:52:47 PST 2023
Author: Daniel Kiss
Date: 2023-01-20T18:52:38+01:00
New Revision: c4fa504f797f68297c252dc91a24c7d37c1de4df
URL: https://github.com/llvm/llvm-project/commit/c4fa504f797f68297c252dc91a24c7d37c1de4df
DIFF: https://github.com/llvm/llvm-project/commit/c4fa504f797f68297c252dc91a24c7d37c1de4df.diff
LOG: [AArch64] Enable libm vectorized functions via SLEEF
It enables trigonometry functions vectorization via SLEEF: http://sleef.org/.
- A new vectorization library enum is added to TargetLibraryInfo.h: SLEEF.
- A new option is added to TargetLibraryInfoImpl - ClVectorLibrary: SLEEF.
- A comprehensive test case is included in this changeset.
- A new vectorization library argument is added to -fveclib: -fveclib=SLEEF.
Trigonometry functions that are vectorized by sleef:
acos
asin
atan
atanh
cos
cosh
exp
exp2
exp10
lgamma
log10
log2
log
sin
sinh
sqrt
tan
tanh
tgamma
Co-authored-by: Stefan Teleman
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D134719
Added:
llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
Modified:
clang/include/clang/Basic/CodeGenOptions.h
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/BackendUtil.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/autocomplete.c
clang/test/Driver/fveclib.c
llvm/include/llvm/Analysis/TargetLibraryInfo.h
llvm/include/llvm/Analysis/VecFuncs.def
llvm/lib/Analysis/TargetLibraryInfo.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 4cc0d05d177b3..4175fe3072ab8 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -60,6 +60,7 @@ class CodeGenOptions : public CodeGenOptionsBase {
LIBMVEC, // GLIBC vector math library.
MASSV, // IBM MASS vector library.
SVML, // Intel short vector math library.
+ SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions.
Darwin_libsystem_m // Use Darwin's libsytem_m vector functions.
};
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index ec7b5bfa1554a..343cc77a18c43 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2473,9 +2473,9 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group<f_clang_
Alias<fno_global_isel>;
def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Use the given vector functions library">,
- Values<"Accelerate,libmvec,MASSV,SVML,Darwin_libsystem_m,none">,
+ Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,none">,
NormalizedValuesScope<"CodeGenOptions">,
- NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML",
+ NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF",
"Darwin_libsystem_m", "NoLibrary"]>,
MarshallingInfoEnum<CodeGenOpts<"VecLib">, "NoLibrary">;
def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 937a8dc40667e..ecc727d6dd281 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -271,27 +271,28 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
switch (CodeGenOpts.getVecLib()) {
case CodeGenOptions::Accelerate:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate);
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate,
+ TargetTriple);
break;
case CodeGenOptions::LIBMVEC:
- switch(TargetTriple.getArch()) {
- default:
- break;
- case llvm::Triple::x86_64:
- TLII->addVectorizableFunctionsFromVecLib
- (TargetLibraryInfoImpl::LIBMVEC_X86);
- break;
- }
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86,
+ TargetTriple);
break;
case CodeGenOptions::MASSV:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV);
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV,
+ TargetTriple);
break;
case CodeGenOptions::SVML:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML);
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML,
+ TargetTriple);
+ break;
+ case CodeGenOptions::SLEEF:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
+ TargetTriple);
break;
case CodeGenOptions::Darwin_libsystem_m:
TLII->addVectorizableFunctionsFromVecLib(
- TargetLibraryInfoImpl::DarwinLibSystemM);
+ TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
break;
default:
break;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9821c386c87d6..f766eeda3cc4a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5131,7 +5131,26 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
}
- Args.AddLastArg(CmdArgs, options::OPT_fveclib);
+ if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
+ StringRef Name = A->getValue();
+ if (Name == "SVML") {
+ if (Triple.getArch() != llvm::Triple::x86 &&
+ Triple.getArch() != llvm::Triple::x86_64)
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << Name << Triple.getArchName();
+ } else if (Name == "LIBMVEC-X86") {
+ if (Triple.getArch() != llvm::Triple::x86 &&
+ Triple.getArch() != llvm::Triple::x86_64)
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << Name << Triple.getArchName();
+ } else if (Name == "SLEEF") {
+ if (Triple.getArch() != llvm::Triple::aarch64 &&
+ Triple.getArch() != llvm::Triple::aarch64_be)
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << Name << Triple.getArchName();
+ }
+ A->render(Args, CmdArgs);
+ }
if (Args.hasFlag(options::OPT_fmerge_all_constants,
options::OPT_fno_merge_all_constants, false))
diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c
index 502eee107d0b8..ea647e3c7ef0f 100644
--- a/clang/test/Driver/autocomplete.c
+++ b/clang/test/Driver/autocomplete.c
@@ -84,6 +84,7 @@
// FVECLIBALL-NEXT: libmvec
// FVECLIBALL-NEXT: MASSV
// FVECLIBALL-NEXT: none
+// FVECLIBALL-NEXT: SLEEF
// FVECLIBALL-NEXT: SVML
// RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL
// FSOVERALL: all
diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c
index 2bf7558a02af8..d6049763dd112 100644
--- a/clang/test/Driver/fveclib.c
+++ b/clang/test/Driver/fveclib.c
@@ -3,6 +3,7 @@
// RUN: %clang -### -c -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-libmvec %s
// RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck -check-prefix CHECK-MASSV %s
// RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck -check-prefix CHECK-DARWIN_LIBSYSTEM_M %s
+// RUN: %clang -### -c --target=aarch64-none-none -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-SLEEF %s
// RUN: not %clang -c -fveclib=something %s 2>&1 | FileCheck -check-prefix CHECK-INVALID %s
// CHECK-NOLIB: "-fveclib=none"
@@ -10,9 +11,15 @@
// CHECK-libmvec: "-fveclib=libmvec"
// CHECK-MASSV: "-fveclib=MASSV"
// CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m"
+// CHECK-SLEEF: "-fveclib=SLEEF"
// CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'
+// RUN: not %clang --target=x86-none-none -c -fveclib=SLEEF %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
+// RUN: not %clang --target=aarch64-none-none -c -fveclib=LIBMVEC-X86 %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
+// RUN: not %clang --target=aarch64-none-none -c -fveclib=SVML %s 2>&1 | FileCheck -check-prefix CHECK-ERROR %s
+// CHECK-ERROR: unsupported option {{.*}} for target
+
// RUN: %clang -fveclib=Accelerate %s -target arm64-apple-ios8.0.0 -### 2>&1 | FileCheck --check-prefix=CHECK-LINK %s
// CHECK-LINK: "-framework" "Accelerate"
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index f4a48262328ee..8fcfbdbd6665c 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -94,7 +94,8 @@ class TargetLibraryInfoImpl {
DarwinLibSystemM, // Use Darwin's libsystem_m.
LIBMVEC_X86, // GLIBC Vector Math library.
MASSV, // IBM MASS vector library.
- SVML // Intel short vector math library.
+ SVML, // Intel short vector math library.
+ SLEEFGNUABI // SLEEF - SIMD Library for Evaluating Elementary Functions.
};
TargetLibraryInfoImpl();
@@ -154,7 +155,8 @@ class TargetLibraryInfoImpl {
/// Calls addVectorizableFunctions with a known preset of functions for the
/// given vector library.
- void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib);
+ void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib,
+ const llvm::Triple &TargetTriple);
/// Return true if the function F has a vector equivalent with vectorization
/// factor VF.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 8a1ebec4c7277..85d208b946252 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -466,6 +466,146 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f4", FIXED(4))
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f8", FIXED(8))
TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16))
+#elif defined(TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS)
+
+TLI_DEFINE_VECFUNC( "acos", "_ZGVnN2v_acos", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.acos.f64", "_ZGVnN2v_acos", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "asin", "_ZGVnN2v_asin", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.asin.f64", "_ZGVnN2v_asin", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "atan", "_ZGVnN2v_atan", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "atan2", "_ZGVnN2vv_atan2", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.atan2.v2f64", "_ZGVnN2vv_atan2", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "atanh", "_ZGVnN2v_atanh", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.atanh.f64", "_ZGVnN2v_atanh", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "cos", "_ZGVnN2v_cos", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "cosh", "_ZGVnN2v_cosh", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.cosh.f64", "_ZGVnN2v_cosh", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "exp", "_ZGVnN2v_exp", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.exp.v2f64", "_ZGVnN2v_exp", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.exp2.v2f64", "_ZGVnN2v_exp2", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.exp10.v2f64", "_ZGVnN2v_exp10", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.lgamma.f64", "_ZGVnN2v_lgamma", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "log", "_ZGVnN2v_log", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.log.f64", "_ZGVnN2v_log", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "log2", "_ZGVnN2v_log2", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.pow.v2f64", "_ZGVnN2vv_pow", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.sqrt.f64", "_ZGVnN2v_sqrt", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "tanh", "_ZGVnN2v_tanh", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.tanh.f64", "_ZGVnN2v_tanh", FIXED(2))
+
+TLI_DEFINE_VECFUNC( "tgamma", "_ZGVnN2v_tgamma", FIXED(2))
+TLI_DEFINE_VECFUNC( "llvm.tgamma.f64", "_ZGVnN2v_tgamma", FIXED(2))
+
+#elif defined(TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS)
+
+TLI_DEFINE_VECFUNC( "acosf", "_ZGVnN4v_acosf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.acos.f32", "_ZGVnN4v_acosf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "asinf", "_ZGVnN4v_asinf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.asin.f32", "_ZGVnN4v_asinf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "atanf", "_ZGVnN4v_atanf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "atan2f", "_ZGVnN4vv_atan2f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.atan2.v4f32", "_ZGVnN4vv_atan2f", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "atanhf", "_ZGVnN4v_atanhf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.atanh.f32", "_ZGVnN4v_atanhf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "cosf", "_ZGVnN4v_cosf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "coshf", "_ZGVnN4v_coshf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.cosh.f32", "_ZGVnN4v_coshf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "expf", "_ZGVnN4v_expf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.exp.v4f32", "_ZGVnN4v_expf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.exp2.v4f32", "_ZGVnN4v_exp2f", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.exp10.v4f32", "_ZGVnN4v_exp10f", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.lgamma.f32", "_ZGVnN4v_lgammaf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "logf", "_ZGVnN4v_logf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.log.f32", "_ZGVnN4v_logf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "log2f", "_ZGVnN4v_log2f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.pow.v4f32", "_ZGVnN4vv_powf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.sqrt.f32", "_ZGVnN4v_sqrtf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "tanhf", "_ZGVnN4v_tanhf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.tanh.f32", "_ZGVnN4v_tanhf", FIXED(4))
+
+TLI_DEFINE_VECFUNC( "tgammaf", "_ZGVnN4v_tgammaf", FIXED(4))
+TLI_DEFINE_VECFUNC( "llvm.tgamma.f32", "_ZGVnN4v_tgammaf", FIXED(4))
+
#else
#error "Must choose which vector library functions are to be defined."
#endif
@@ -476,4 +616,6 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16))
#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS
#undef TLI_DEFINE_SVML_VECFUNCS
+#undef TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
+#undef TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 4c85bcca0a72a..31cc0e7ec30ea 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -31,7 +31,9 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
"IBM MASS vector library"),
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
- "Intel SVML library")));
+ "Intel SVML library"),
+ clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
+ "SIMD Library for Evaluating Elementary Functions")));
StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
{
@@ -852,7 +854,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_vec_free);
}
- TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
+ TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T);
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
@@ -1134,7 +1136,7 @@ void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
}
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
- enum VectorLibrary VecLib) {
+ enum VectorLibrary VecLib, const llvm::Triple &TargetTriple) {
switch (VecLib) {
case Accelerate: {
const VecDesc VecFuncs[] = {
@@ -1176,6 +1178,27 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
addVectorizableFunctions(VecFuncs);
break;
}
+ case SLEEFGNUABI: {
+ const VecDesc VecFuncs_VF2[] = {
+#define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
+#include "llvm/Analysis/VecFuncs.def"
+ };
+ const VecDesc VecFuncs_VF4[] = {
+#define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
+#include "llvm/Analysis/VecFuncs.def"
+ };
+
+ switch (TargetTriple.getArch()) {
+ default:
+ break;
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
+ addVectorizableFunctions(VecFuncs_VF2);
+ addVectorizableFunctions(VecFuncs_VF4);
+ break;
+ }
+ break;
+ }
case NoLibrary:
break;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
new file mode 100644
index 0000000000000..623ac986fcace
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
@@ -0,0 +1,1076 @@
+; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail.
+; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib -loop-unroll -loop-vectorize -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare double @acos(double) #0
+declare float @acosf(float) #0
+declare double @llvm.acos.f64(double) #0
+declare float @llvm.acos.f32(float) #0
+
+define void @acos_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @acos_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @acos(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @acos_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @acos_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @acosf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @asin(double) #0
+declare float @asinf(float) #0
+declare double @llvm.asin.f64(double) #0
+declare float @llvm.asin.f32(float) #0
+
+define void @asin_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @asin_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @asin(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @asin_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @asin_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @asinf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @atan(double) #0
+declare float @atanf(float) #0
+declare double @llvm.atan.f64(double) #0
+declare float @llvm.atan.f32(float) #0
+
+define void @atan_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @atan_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @atan(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @atan_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @atan_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @atanf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @atan2(double, double) #0
+declare float @atan2f(float, float) #0
+declare double @llvm.atan2.f64(double, double) #0
+declare float @llvm.atan2.f32(float, float) #0
+
+define void @atan2_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @atan2_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @atan2(double %conv, double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @atan2_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @atan2_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @atan2f(float %conv, float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @atanh(double) #0
+declare float @atanhf(float) #0
+declare double @llvm.atanh.f64(double) #0
+declare float @llvm.atanh.f32(float) #0
+
+define void @atanh_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @atanh_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @atanh(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @atanh_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @atanh_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @atanhf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @cos(double) #0
+declare float @cosf(float) #0
+declare double @llvm.cos.f64(double) #0
+declare float @llvm.cos.f32(float) #0
+
+define void @cos_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @cos_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @cos(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @cos_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @cos_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @cosf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @cosh(double) #0
+declare float @coshf(float) #0
+declare double @llvm.cosh.f64(double) #0
+declare float @llvm.cosh.f32(float) #0
+
+define void @cosh_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @cosh_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @cosh(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @cosh_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @cosh_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @coshf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @exp(double) #0
+declare float @expf(float) #0
+declare double @llvm.exp.f64(double) #0
+declare float @llvm.exp.f32(float) #0
+
+define void @exp_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @exp(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @exp_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @expf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @exp2(double) #0
+declare float @exp2f(float) #0
+declare double @llvm.exp2.f64(double) #0
+declare float @llvm.exp2.f32(float) #0
+
+define void @exp2_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @exp2(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @exp2_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp2_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @exp2f(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @exp10(double) #0
+declare float @exp10f(float) #0
+declare double @llvm.exp10.f64(double) #0
+declare float @llvm.exp10.f32(float) #0
+
+define void @exp10_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @exp10_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @exp10(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @exp10_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @exp10_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @exp10f(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @lgamma(double) #0
+declare float @lgammaf(float) #0
+declare double @llvm.lgamma.f64(double) #0
+declare float @llvm.lgamma.f32(float) #0
+
+define void @lgamma_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @lgamma_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @lgamma(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @lgamma_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @lgamma_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @lgammaf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @log10(double) #0
+declare float @log10f(float) #0
+declare double @llvm.log10.f64(double) #0
+declare float @llvm.log10.f32(float) #0
+
+define void @log10_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @log10_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @log10(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @log10_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @log10_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @log10f(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @log2(double) #0
+declare float @log2f(float) #0
+declare double @llvm.log2.f64(double) #0
+declare float @llvm.log2.f32(float) #0
+
+define void @log2_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @log2_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @log2(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @log2_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @log2_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @log2f(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @log(double) #0
+declare float @logf(float) #0
+declare double @llvm.log.f64(double) #0
+declare float @llvm.log.f32(float) #0
+
+define void @log_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @log_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @log(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @log_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @log_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @logf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @pow(double, double) #0
+declare float @powf(float, float) #0
+declare double @llvm.pow.f64(double, double) #0
+declare float @llvm.pow.f32(float, float) #0
+
+define void @pow_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @pow_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @pow(double %conv, double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @pow_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @pow_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @powf(float %conv, float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @sin(double) #0
+declare float @sinf(float) #0
+declare double @llvm.sin.f64(double) #0
+declare float @llvm.sin.f32(float) #0
+
+define void @sin_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @sin_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @sin(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @sin_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @sin_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @sinf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @sinh(double) #0
+declare float @sinhf(float) #0
+declare double @llvm.sinh.f64(double) #0
+declare float @llvm.sinh.f32(float) #0
+
+define void @sinh_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @sinh_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @sinh(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @sinh_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @sinh_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @sinhf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @sqrt(double) #0
+declare float @sqrtf(float) #0
+declare double @llvm.sqrt.f64(double) #0
+declare float @llvm.sqrt.f32(float) #0
+
+define void @sqrt_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @sqrt_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @sqrt(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @sqrt_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @sqrt_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @sqrtf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @tan(double) #0
+declare float @tanf(float) #0
+declare double @llvm.tan.f64(double) #0
+declare float @llvm.tan.f32(float) #0
+
+define void @tan_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @tan_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tan(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @tan_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @tan_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tanf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @tanh(double) #0
+declare float @tanhf(float) #0
+declare double @llvm.tanh.f64(double) #0
+declare float @llvm.tanh.f32(float) #0
+
+define void @tanh_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @tanh_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tanh(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @tanh_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @tanh_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tanhf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+declare double @tgamma(double) #0
+declare float @tgammaf(float) #0
+declare double @llvm.tgamma.f64(double) #0
+declare float @llvm.tgamma.f32(float) #0
+
+define void @tgamma_f64(double* nocapture %varray) {
+ ; CHECK-LABEL: @tgamma_f64(
+ ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tgamma(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @tgamma_f32(float* nocapture %varray) {
+ ; CHECK-LABEL: @tgamma_f32(
+ ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]])
+ ; CHECK: ret void
+ ;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tgammaf(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
More information about the cfe-commits
mailing list