[llvm] [LIBM][AMDLIBM] - Add new vector call support for fveclib=AMDLIBM (PR #180896)
Rohit Aggarwal via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 11 00:11:18 PST 2026
https://github.com/rohitaggarwal007 created https://github.com/llvm/llvm-project/pull/180896
AMD has it's own implementation of vector calls. New vector calls are introduced in the library.
Please refer [https://github.com/amd/aocl-libm-ose]
>From ae49a3c4add43301cd89b675973b1725c0d11afe Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Wed, 11 Feb 2026 13:37:52 +0530
Subject: [PATCH] [LIBM][AMDLIBM] - Add new vector call for amdibm
---
llvm/include/llvm/Analysis/VecFuncs.def | 27 ++++++
.../LoopVectorize/X86/amdlibm-calls-finite.ll | 95 ++++++++++++++++++-
.../LoopVectorize/X86/amdlibm-calls.ll | 70 ++++++++++++--
llvm/test/Transforms/Util/add-TLI-mappings.ll | 9 +-
4 files changed, 189 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index f1c615a98aab5..4811bb86786e7 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -1888,12 +1888,39 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_
TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("sincos", "amd_vrd2_sincos", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
TLI_DEFINE_VECFUNC("sincos", "amd_vrd4_sincos", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl8l8")
TLI_DEFINE_VECFUNC("sincos", "amd_vrd8_sincos", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl8l8")
TLI_DEFINE_VECFUNC("sincosf", "amd_vrs4_sincosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
TLI_DEFINE_VECFUNC("sincosf", "amd_vrs8_sincosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl4l4")
TLI_DEFINE_VECFUNC("sincosf", "amd_vrs16_sincosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vl4l4")
+
+TLI_DEFINE_VECFUNC("asin", "amd_vrd2_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("asin", "amd_vrd4_asin", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("llvm.asin.f64", "amd_vrd2_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.asin.f64", "amd_vrd4_asin", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("__asin_finite", "amd_vrd2_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("__asin_finite", "amd_vrd4_asin", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("acos", "amd_vrd2_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("acos", "amd_vrd4_acos", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("llvm.acos.f64", "amd_vrd2_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.acos.f64", "amd_vrd4_acos", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("__acos_finite", "amd_vrd2_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("__acos_finite", "amd_vrd4_acos", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("erfc", "amd_vrd2_erfc", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("erfc", "amd_vrd4_erfc", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("erfc", "amd_vrd8_erfc", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("erfcf", "amd_vrs4_erfcf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("erfcf", "amd_vrs8_erfcf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("erfcf", "amd_vrs16_erfcf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
#else
#error "Must choose which vector library functions are to be defined."
#endif
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
index 9899eded73808..3d3d681781fb4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
@@ -390,6 +390,98 @@ for.end: ; preds = %for.body
ret void
}
+; =============================== __asin_finite (VF2, VF4) ========================
+
+define void @asin_vf2_f64_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @asin_vf2_f64_finite(
+; CHECK: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_asin(<2 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @__asin_finite(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+ ret void
+}
+
+define void @asin_vf4_f64_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @asin_vf4_f64_finite(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_asin(<4 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @__asin_finite(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:
+ ret void
+}
+
+; =============================== __acos_finite (VF2, VF4) ========================
+
+define void @acos_vf2_f64_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @acos_vf2_f64_finite(
+; CHECK: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_acos(<2 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @__acos_finite(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+ ret void
+}
+
+define void @acos_vf4_f64_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @acos_vf4_f64_finite(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_acos(<4 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @__acos_finite(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:
+ ret void
+}
+
!1 = distinct !{!1, !2, !3}
!2 = !{!"llvm.loop.vectorize.width", i32 2}
!3 = !{!"llvm.loop.vectorize.enable", i1 true}
@@ -418,4 +510,5 @@ declare double @__exp10_finite(double) #0
declare float @__exp10f_finite(float) #0
declare double @__asin_finite(double) #0
declare float @__asinf_finite(float) #0
-declare float @__acosf_finite(float) #0
\ No newline at end of file
+declare float @__acosf_finite(float) #0
+declare double @__acos_finite(double) #0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 4ced0372e5da3..62f117ef2f89f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -398,8 +398,8 @@ for.end:
define void @acos_f64(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f64(
-; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
-; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_acos(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_acos(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
@@ -450,8 +450,8 @@ for.end:
define void @acos_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f64_intrinsic(
-; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
-; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_acos(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_acos(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
@@ -502,8 +502,8 @@ for.end:
define void @asin_f64(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f64(
-; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
-; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_asin(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_asin(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
@@ -554,8 +554,8 @@ for.end:
define void @asin_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f64_intrinsic(
-; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
-; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_asin(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_asin(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
@@ -1809,6 +1809,57 @@ for.cond.cleanup:
ret void
}
+
+; ======================= erfc ============================
+define void @erfc_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @erfc_f64(
+;
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_erfc(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_erfc(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_erfc(<8 x double> [[TMP4:%.*]])
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @erfc(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; ======================= erfcf ============================
+define void @erfcf_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @erfcf_f32(
+;
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_erfcf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_erfcf(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_erfcf(<16 x float> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @erfcf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
attributes #0 = { nounwind readnone }
declare double @exp10(double) #0
@@ -1817,3 +1868,6 @@ declare double @llvm.exp10.f64(double) #0
declare float @llvm.exp10.f32(float) #0
declare void @sincos(double, ptr, ptr)
declare void @sincosf(float, ptr, ptr)
+declare double @erfc(double) #0
+declare float @erfcf(float) #0
+
diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
index 5459512239b64..ac7ba663f6cf9 100644
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -16,15 +16,16 @@
; SVML-SAME: ptr @__svml_log10f4,
; SVML-SAME: ptr @__svml_log10f8,
; SVML-SAME: ptr @__svml_log10f16
-; AMDLIBM-SAME: [11 x ptr] [
+; AMDLIBM-SAME: [12 x ptr] [
; AMDLIBM-SAME: ptr @amd_vrd2_sin,
; AMDLIBM-SAME: ptr @amd_vrd4_sin,
; AMDLIBM-SAME: ptr @amd_vrd8_sin,
+; AMDLIBM-SAME: ptr @amd_vrd2_sincos,
; AMDLIBM-SAME: ptr @amd_vrd4_sincos,
; AMDLIBM-SAME: ptr @amd_vrd8_sincos,
; AMDLIBM-SAME: ptr @amd_vrs4_sincosf,
; AMDLIBM-SAME: ptr @amd_vrs8_sincosf,
-; AMDLIBM-SAME: ptr @amd_vrs16_sincosf
+; AMDLIBM-SAME: ptr @amd_vrs16_sincosf,
; AMDLIBM-SAME: ptr @amd_vrs4_log10f,
; AMDLIBM-SAME: ptr @amd_vrs8_log10f,
; AMDLIBM-SAME: ptr @amd_vrs16_log10f
@@ -191,6 +192,7 @@ declare float @llvm.log10.f32(float) #0
; AMDLIBM: declare <2 x double> @amd_vrd2_sin(<2 x double>)
; AMDLIBM: declare <4 x double> @amd_vrd4_sin(<4 x double>)
; AMDLIBM: declare <8 x double> @amd_vrd8_sin(<8 x double>)
+; AMDLIBM: declare void @amd_vrd2_sincos(<2 x double>, ptr, ptr)
; AMDLIBM: declare void @amd_vrd4_sincos(<4 x double>, ptr, ptr)
; AMDLIBM: declare void @amd_vrd8_sincos(<8 x double>, ptr, ptr)
; AMDLIBM: declare void @amd_vrs4_sincosf(<4 x float>, ptr, ptr)
@@ -263,7 +265,8 @@ attributes #0 = { nounwind readnone }
; AMDLIBM-SAME: _ZGV_LLVM_N4v_sin(amd_vrd4_sin),
; AMDLIBM-SAME: _ZGV_LLVM_N8v_sin(amd_vrd8_sin)" }
; AMDLIBM: attributes #[[SINCOS]] = { "vector-function-abi-variant"=
-; AMDLIBM-SAME: "_ZGV_LLVM_N4vl8l8_sincos(amd_vrd4_sincos),
+; AMDLIBM-SAME: "_ZGV_LLVM_N2vl8l8_sincos(amd_vrd2_sincos),
+; AMDLIBM-SAME: _ZGV_LLVM_N4vl8l8_sincos(amd_vrd4_sincos),
; AMDLIBM-SAME: _ZGV_LLVM_N8vl8l8_sincos(amd_vrd8_sincos)" }
; AMDLIBM: attributes #[[SINCOSF]] = { "vector-function-abi-variant"=
; AMDLIBM-SAME: "_ZGV_LLVM_N4vl4l4_sincosf(amd_vrs4_sincosf),
More information about the llvm-commits
mailing list