[llvm] 92e0ab9 - [AArch64] Don't map llvm sqrt intrinsics to veclib functions

Tue Mar 7 04:17:20 PST 2023

Author: Graham Hunter
Date: 2023-03-07T11:43:41Z
New Revision: 92e0ab937ffd348e723586ac0a980ffeca4dad48

URL: https://github.com/llvm/llvm-project/commit/92e0ab937ffd348e723586ac0a980ffeca4dad48
DIFF: https://github.com/llvm/llvm-project/commit/92e0ab937ffd348e723586ac0a980ffeca4dad48.diff

LOG: [AArch64] Don't map llvm sqrt intrinsics to veclib functions

Since AArch64 has sqrt instructions, we want to use those instead of
calls to vector math routines for llvm sqrt intrinsics (since those
don't imply some of the constraints that libm calls might have) so
we just remove the mappings.

Code originally written by mgabka

Reviewed By: danielkiss, paulwalker-arm

Differential Revision: https://reviews.llvm.org/D145392

Added: 
    llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll

Modified: 
    llvm/include/llvm/Analysis/VecFuncs.def
    llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 85d208b946252..34f5b3904cb07 100644

--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -525,7 +525,6 @@ TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2))
 TLI_DEFINE_VECFUNC( "llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2))
 
 TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2))
-TLI_DEFINE_VECFUNC( "llvm.sqrt.f64", "_ZGVnN2v_sqrt", FIXED(2))
 
 TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2))
 TLI_DEFINE_VECFUNC( "llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2))
@@ -595,7 +594,6 @@ TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4))
 TLI_DEFINE_VECFUNC( "llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4))
 
 TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4))
-TLI_DEFINE_VECFUNC( "llvm.sqrt.f32", "_ZGVnN4v_sqrtf", FIXED(4))
 
 TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4))
 TLI_DEFINE_VECFUNC( "llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4))

diff  --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll
new file mode 100644
index 0000000000000..03fca2ffe3e32
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib -S < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <2 x double> @llvm_sqrt_f64(<2 x double> %in) {
+; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_f64
+; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[IN]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %in)
+  ret <2 x double> %1
+}
+
+define <4 x float> @llvm_sqrt_f32(<4 x float> %in) {
+; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_f32
+; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[IN]])
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
+  ret <4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sqrt_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_vscale_f64
+; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sqrt_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_vscale_f32
+; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>)

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
index 623ac986fcace..ed9e1b4b6fb26 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
@@ -922,6 +922,53 @@ define void @sqrt_f32(float* nocapture %varray) {
   ret void
 }
 
+	
+define void @llvm_sqrt_f64(double* nocapture %varray) {
+  ; CHECK-LABEL: @llvm_sqrt_f64(
+  ; CHECK:    [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]])
+  ; CHECK:    ret void
+  ;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call fast double @llvm.sqrt.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @llvm_sqrt_f32(float* nocapture %varray) {
+  ; CHECK-LABEL: @llvm_sqrt_f32(
+  ; CHECK:    [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]])
+  ; CHECK:    ret void
+  ;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call fast float @llvm.sqrt.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @tan(double) #0
 declare float @tanf(float) #0
 declare double @llvm.tan.f64(double) #0