[clang] [llvm] Adding support of AMDLIBM vector library (PR #78560)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 18 02:35:38 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver
Author: Rohit Aggarwal (rohitaggarwal007)
<details>
<summary>Changes</summary>
Hi,
AMD has it's own implementation of vector calls. This patch include the changes to enable the use of AMD's math library using -fveclib=AMDLIBM.
---
Patch is 60.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78560.diff
11 Files Affected:
- (modified) clang/include/clang/Driver/Options.td (+2-2)
- (modified) clang/test/Driver/autocomplete.c (+1)
- (modified) llvm/include/llvm/Analysis/TargetLibraryInfo.h (+2-1)
- (modified) llvm/include/llvm/Frontend/Driver/CodeGenOptions.h (+2-1)
- (modified) llvm/lib/Analysis/TargetLibraryInfo.cpp (+210-1)
- (modified) llvm/lib/Frontend/Driver/CodeGenOptions.cpp (+4)
- (modified) llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll (+11)
- (added) llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll (+332)
- (added) llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll (+747)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll (+28-1)
- (modified) llvm/test/Transforms/Util/add-TLI-mappings.ll (+23)
``````````diff
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e4fdad8265c863..2fbe1f49a79aab 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3190,10 +3190,10 @@ def fno_experimental_isel : Flag<["-"], "fno-experimental-isel">, Group<f_clang_
def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>,
Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Use the given vector functions library">,
- Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">,
+ Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,AMDLIBM,none">,
NormalizedValuesScope<"llvm::driver::VectorLibrary">,
NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF",
- "Darwin_libsystem_m", "ArmPL", "NoLibrary"]>,
+ "Darwin_libsystem_m", "ArmPL", "AMDLIBM", "NoLibrary"]>,
MarshallingInfoEnum<CodeGenOpts<"VecLib">, "NoLibrary">;
def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
Alias<flax_vector_conversions_EQ>, AliasArgs<["none"]>;
diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c
index d6f57708b67eb6..c8ceaaf404672f 100644
--- a/clang/test/Driver/autocomplete.c
+++ b/clang/test/Driver/autocomplete.c
@@ -80,6 +80,7 @@
// FLTOALL-NEXT: thin
// RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL
// FVECLIBALL: Accelerate
+// FVECLIBALL-NEXT: AMDLIBM
// FVECLIBALL-NEXT: ArmPL
// FVECLIBALL-NEXT: Darwin_libsystem_m
// FVECLIBALL-NEXT: libmvec
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index daf1d8e2079f85..4a3edb8f02a7a8 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -129,7 +129,8 @@ class TargetLibraryInfoImpl {
MASSV, // IBM MASS vector library.
SVML, // Intel short vector math library.
SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
- ArmPL // Arm Performance Libraries.
+ ArmPL, // Arm Performance Libraries.
+ AMDLIBM
};
TargetLibraryInfoImpl();
diff --git a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h
index 0b1d924a26b2de..0180670c4c6991 100644
--- a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h
+++ b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h
@@ -29,7 +29,8 @@ enum class VectorLibrary {
SVML, // Intel short vector math library.
SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions.
Darwin_libsystem_m, // Use Darwin's libsystem_m vector functions.
- ArmPL // Arm Performance Libraries.
+ ArmPL, // Arm Performance Libraries.
+ AMDLIBM // AMD vector math library.
};
TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 58749e559040a7..16afc33bf7ce88 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -37,7 +37,9 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
"SIMD Library for Evaluating Elementary Functions"),
clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL",
- "Arm Performance Libraries")));
+ "Arm Performance Libraries"),
+ clEnumValN(TargetLibraryInfoImpl::AMDLIBM, "AMDLIBM",
+ "AMD vector math library")));
StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
{
@@ -1279,6 +1281,213 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
}
break;
}
+ case AMDLIBM: {
+#define FIXED(NL) ElementCount::getFixed(NL)
+ const VecDesc VecFuncs[] = {
+ {"sinf", "amd_vrs16_sinf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"sinf", "amd_vrs8_sinf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"sinf", "amd_vrs4_sinf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"sin", "amd_vrd8_sin", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"sin", "amd_vrd4_sin", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"sin", "amd_vrd2_sin", FIXED(2), false, "_ZGV_LLVM_N2v"},
+
+ {"llvm.sin.f32", "amd_vrs16_sinf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"llvm.sin.f32", "amd_vrs8_sinf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.sin.f32", "amd_vrs4_sinf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.sin.f64", "amd_vrd8_sin", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.sin.f64", "amd_vrd4_sin", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.sin.f64", "amd_vrd2_sin", FIXED(2), false, "_ZGV_LLVM_N2v"},
+
+ {"cosf", "amd_vrs16_cosf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"cosf", "amd_vrs8_cosf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"cosf", "amd_vrs4_cosf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"cos", "amd_vrd8_cos", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"cos", "amd_vrd4_cos", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"cos", "amd_vrd2_cos", FIXED(2), false, "_ZGV_LLVM_N2v"},
+
+ {"llvm.cos.f32", "amd_vrs16_cosf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"llvm.cos.f32", "amd_vrs8_cosf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.cos.f32", "amd_vrs4_cosf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.cos.f64", "amd_vrd8_cos", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.cos.f64", "amd_vrd4_cos", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.cos.f64", "amd_vrd2_cos", FIXED(2), false, "_ZGV_LLVM_N2v"},
+
+ {"expf", "amd_vrs16_expf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"expf", "amd_vrs8_expf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"expf", "amd_vrs4_expf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"exp", "amd_vrd2_exp", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"exp", "amd_vrd4_exp", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"exp", "amd_vrd8_exp", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"__expf_finite", "amd_vrs16_expf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"__expf_finite", "amd_vrs8_expf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"__expf_finite", "amd_vrs4_expf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__exp_finite", "amd_vrd2_exp", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"__exp_finite", "amd_vrd4_exp", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__exp_finite", "amd_vrd8_exp", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"llvm.exp.f32", "amd_vrs16_expf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"llvm.exp.f32", "amd_vrs8_expf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.exp.f32", "amd_vrs4_expf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.exp.f64", "amd_vrd2_exp", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"llvm.exp.f64", "amd_vrd4_exp", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.exp.f64", "amd_vrd8_exp", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"exp2f", "amd_vrs16_exp2f", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"exp2f", "amd_vrs8_exp2f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"exp2f", "amd_vrs4_exp2f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"exp2", "amd_vrd2_exp2", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"exp2", "amd_vrd4_exp2", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"exp2", "amd_vrd8_exp2", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"__exp2f_finite", "amd_vrs16_exp2f", FIXED(16), false,
+ "_ZGV_LLVM_N16v"},
+ {"__exp2f_finite", "amd_vrs8_exp2f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"__exp2f_finite", "amd_vrs4_exp2f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__exp2_finite", "amd_vrd2_exp2", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"__exp2_finite", "amd_vrd4_exp2", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__exp2_finite", "amd_vrd8_exp2", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"llvm.exp2.f32", "amd_vrs16_exp2f", FIXED(16), false,
+ "_ZGV_LLVM_N16v"},
+ {"llvm.exp2.f32", "amd_vrs8_exp2f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.exp2.f32", "amd_vrs4_exp2f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.exp2.f64", "amd_vrd2_exp2", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"llvm.exp2.f64", "amd_vrd4_exp2", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.exp2.f64", "amd_vrd8_exp2", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"powf", "amd_vrs16_powf", FIXED(16), false, "_ZGV_LLVM_N16vv"},
+ {"powf", "amd_vrs8_powf", FIXED(8), false, "_ZGV_LLVM_N8vv"},
+ {"powf", "amd_vrs4_powf", FIXED(4), false, "_ZGV_LLVM_N4vv"},
+ {"pow", "amd_vrd2_pow", FIXED(2), false, "_ZGV_LLVM_N2vv"},
+ {"pow", "amd_vrd4_pow", FIXED(4), false, "_ZGV_LLVM_N4vv"},
+ {"pow", "amd_vrd8_pow", FIXED(8), false, "_ZGV_LLVM_N8vv"},
+
+ {"__powf_finite", "amd_vrs16_powf", FIXED(16), false,
+ "_ZGV_LLVM_N16vv"},
+ {"__powf_finite", "amd_vrs8_powf", FIXED(8), false, "_ZGV_LLVM_N8vv"},
+ {"__powf_finite", "amd_vrs4_powf", FIXED(4), false, "_ZGV_LLVM_N4vv"},
+ {"__pow_finite", "amd_vrd2_pow", FIXED(2), false, "_ZGV_LLVM_N2vv"},
+ {"__pow_finite", "amd_vrd4_pow", FIXED(4), false, "_ZGV_LLVM_N4vv"},
+ {"__pow_finite", "amd_vrd8_pow", FIXED(8), false, "_ZGV_LLVM_N8vv"},
+
+ {"llvm.pow.f32", "amd_vrs16_powf", FIXED(16), false, "_ZGV_LLVM_N16vv"},
+ {"llvm.pow.f32", "amd_vrs8_powf", FIXED(8), false, "_ZGV_LLVM_N8vv"},
+ {"llvm.pow.f32", "amd_vrs4_powf", FIXED(4), false, "_ZGV_LLVM_N4vv"},
+ {"llvm.pow.f64", "amd_vrd2_pow", FIXED(2), false, "_ZGV_LLVM_N2vv"},
+ {"llvm.pow.f64", "amd_vrd4_pow", FIXED(4), false, "_ZGV_LLVM_N4vv"},
+ {"llvm.pow.f64", "amd_vrd8_pow", FIXED(8), false, "_ZGV_LLVM_N8vv"},
+
+ {"logf", "amd_vrs16_logf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"logf", "amd_vrs8_logf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"logf", "amd_vrs4_logf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"log", "amd_vrd2_log", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"log", "amd_vrd4_log", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"log", "amd_vrd8_log", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"__logf_finite", "amd_vrs16_logf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"__logf_finite", "amd_vrs8_logf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"__logf_finite", "amd_vrs4_logf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__log_finite", "amd_vrd2_log", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"__log_finite", "amd_vrd4_log", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__log_finite", "amd_vrd8_log", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"llvm.log.f32", "amd_vrs16_logf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"llvm.log.f32", "amd_vrs8_logf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.log.f32", "amd_vrs4_logf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.log.f64", "amd_vrd2_log", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"llvm.log.f64", "amd_vrd4_log", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.log.f64", "amd_vrd8_log", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"log2f", "amd_vrs16_log2f", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"log2f", "amd_vrs8_log2f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"log2f", "amd_vrs4_log2f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"log2", "amd_vrd2_log2", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"log2", "amd_vrd4_log2", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"log2", "amd_vrd8_log2", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"__log2f_finite", "amd_vrs16_log2f", FIXED(16), false,
+ "_ZGV_LLVM_N16v"},
+ {"__log2f_finite", "amd_vrs8_log2f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"__log2f_finite", "amd_vrs4_log2f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__log2_finite", "amd_vrd2_log2", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"__log2_finite", "amd_vrd4_log2", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"__log2_finite", "amd_vrd8_log2", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"llvm.log2.f32", "amd_vrs16_log2f", FIXED(16), false,
+ "_ZGV_LLVM_N16v"},
+ {"llvm.log2.f32", "amd_vrs8_log2f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.log2.f32", "amd_vrs4_log2f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.log2.f64", "amd_vrd2_log2", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"llvm.log2.f64", "amd_vrd4_log2", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"llvm.log2.f64", "amd_vrd8_log2", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"log10f", "amd_vrs16_log10f", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"log10f", "amd_vrs8_log10f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"log10f", "amd_vrs4_log10f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+
+ {"__log10f_finite", "amd_vrs16_log10f", FIXED(16), false,
+ "_ZGV_LLVM_N16v"},
+ {"__log10f_finite", "amd_vrs8_log10f", FIXED(8), false,
+ "_ZGV_LLVM_N8v"},
+ {"__log10f_finite", "amd_vrs4_log10f", FIXED(4), false,
+ "_ZGV_LLVM_N4v"},
+
+ {"llvm.log10.f32", "amd_vrs16_log10f", FIXED(16), false,
+ "_ZGV_LLVM_N16v"},
+ {"llvm.log10.f32", "amd_vrs8_log10f", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"llvm.log10.f32", "amd_vrs4_log10f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+
+ {"erff", "amd_vrs4_erff", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"erff", "amd_vrs8_erff", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"erff", "amd_vrs16_erff", FIXED(16), false, "_ZGV_LLVM_N16v"},
+ {"erf", "amd_vrd2_erf", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"erf", "amd_vrd4_erf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"erf", "amd_vrd8_erf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"exp10", "amd_vrd2_exp10", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"exp10f", "amd_vrs4_exp10f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+
+ {"expm1", "amd_vrd2_expm1", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"expm1f", "amd_vrs4_expm1f", FIXED(4), false, "_ZGV_LLVM_N4v"},
+
+ {"log1p", "amd_vrd2_log1p", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"log1pf", "amd_vrs4_log1pf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+
+ {"tan", "amd_vrd2_tan", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"tan", "amd_vrd4_tan", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"tan", "amd_vrd8_tan", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"tanf", "amd_vrs4_tanf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"tanf", "amd_vrs8_tanf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"tanf", "amd_vrs16_tanf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+
+ {"asin", "amd_vrd8_asin", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"asinf", "amd_vrs4_asinf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"asinf", "amd_vrs8_asinf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"asinf", "amd_vrs16_asinf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+
+ {"acosf", "amd_vrs4_acosf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"acosf", "amd_vrs8_acosf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"atan", "amd_vrd2_atan", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"atan", "amd_vrd4_atan", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"atan", "amd_vrd8_atan", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"atanf", "amd_vrs4_atanf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"atanf", "amd_vrs8_atanf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+ {"atanf", "amd_vrs16_atanf", FIXED(16), false, "_ZGV_LLVM_N16v"},
+
+ {"coshf", "amd_vrs4_coshf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"coshf", "amd_vrs8_coshf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"tanhf", "amd_vrs4_tanhf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ {"tanhf", "amd_vrs8_tanhf", FIXED(8), false, "_ZGV_LLVM_N8v"},
+
+ {"cbrt", "amd_vrd2_cbrt", FIXED(2), false, "_ZGV_LLVM_N2v"},
+ {"cbrtf", "amd_vrs4_cbrtf", FIXED(4), false, "_ZGV_LLVM_N4v"},
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
case NoLibrary:
break;
}
diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
index 96c5b19a4a5913..2d74a91f62dc07 100644
--- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
+++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
@@ -46,6 +46,10 @@ TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
TargetTriple);
break;
+ case VectorLibrary::AMDLIBM:
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::AMDLIBM,
+ TargetTriple);
+ break;
default:
break;
}
diff --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
index df8b7c498bd002..fde6cb788b46f9 100644
--- a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
+++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,SVML
+; RUN: opt -vector-library=AMDLIBM -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,AMDLIBM
; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86
; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV
; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE
@@ -13,6 +14,11 @@ define <4 x double> @exp_v4(<4 x double> %in) {
; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
; SVML-NEXT: ret <4 x double> [[TMP1]]
;
+; AMDLIBM-LABEL: define {{[^@]+}}@exp_v4
+; AMDLIBM-SAME: (<4 x double> [[IN:%.*]]) {
+; AMDLIBM-NEXT: [[TMP1:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[IN]])
+; AMDLIBM-NEXT: ret <4 x double> [[TMP1]]
+;
; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) {
; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]])
@@ -40,6 +46,11 @@ define <4 x float> @exp_f32(<4 x float> %in) {
; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
; SVML-NEXT: ret <4 x float> [[TMP1]]
;
+; AMDLIBM-LABEL: define {{[^@]+}}@exp_f32
+; AMDLIBM-SAME: (<4 x float> [[IN:%.*]]) {
+; AMDLIBM-NEXT: [[TMP1:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[IN]])
+; AMDLIBM-NEXT: ret <4 x float> [[TMP1]]
+;
; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) {
; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
new file mode 100644
index 00000000000000..54bb9352f3c89c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
@@ -0,0 +1,332 @@
+; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+
+; Test to verify that when math headers are built with
+; __FINITE_MATH_ONLY__ enabled, causing use of __<func>_finite
+; function versions, vectorization can map these to vector versions.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare float @__expf_finite(float) #0
+
+; CHECK-LABEL: @exp_f32
+; CHECK: <4 x float> @amd_vrs4_expf
+; CHECK: ret
+define void @exp_f32(ptr nocapture %varray) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %tmp = trunc i64 %indvars.iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call fast float @__expf_finite(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/78560
More information about the cfe-commits
mailing list