[llvm] 3feb63e - [TLI][AArch64] Add SLEEF mappings to scalable vector functions for fmod and fmodf

Thu Aug 3 07:34:00 PDT 2023

Author: Jolanta Jensen
Date: 2023-08-03T14:33:33Z
New Revision: 3feb63e112e21f293e8264cb25028164fcc2cf9f

URL: https://github.com/llvm/llvm-project/commit/3feb63e112e21f293e8264cb25028164fcc2cf9f
DIFF: https://github.com/llvm/llvm-project/commit/3feb63e112e21f293e8264cb25028164fcc2cf9f.diff

LOG: [TLI][AArch64] Add SLEEF mappings to scalable vector functions for fmod and fmodf

This patch adds SLEEF mappings to scalable vector functions for fmod and fmodf.

Differential Revision: https://reviews.llvm.org/D156920

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/VecFuncs.def
    llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index b884c1e3911e6b..ee137edca342b4 100644

--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -644,6 +644,9 @@ TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED)
 TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10",  SCALABLE(2), MASKED)
 TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED)
 
+TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED)
+
 TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma",  SCALABLE(2), MASKED)
 TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED)
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
index d7decd57d0b966..d25e24efd5a238 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
@@ -535,6 +535,55 @@ define void @exp10_f32(float* nocapture %varray) {
   ret void
 }
 
+declare double @fmod(double, double) #0
+declare float @fmodf(float, float) #0
+
+define void @fmod_f64(double* nocapture %varray) {
+  ; CHECK-LABEL: @fmod_f64(
+  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
+  ; CHECK:    ret void
+  ;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @fmod(double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fmod_f32(float* nocapture %varray) {
+  ; CHECK-LABEL: @fmod_f32(
+  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
+  ; CHECK:    ret void
+  ;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @fmodf(float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @lgamma(double) #0
 declare float @lgammaf(float) #0
 declare double @llvm.lgamma.f64(double) #0