[llvm] 89e8a8b - Revert SVML support for sqrt

Mon Oct 5 08:18:51 PDT 2020

Author: Wenlei He
Date: 2020-10-05T08:13:11-07:00
New Revision: 89e8a8b223b2e20bb63e930ddb78cb80a3ed45a2

URL: https://github.com/llvm/llvm-project/commit/89e8a8b223b2e20bb63e930ddb78cb80a3ed45a2
DIFF: https://github.com/llvm/llvm-project/commit/89e8a8b223b2e20bb63e930ddb78cb80a3ed45a2.diff

LOG: Revert SVML support for sqrt

As was brought up in D87169 by @craig.topper we shouldn't map llvm.sqrt to svml since there is a faster native instruction.
https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_p&expand=5824,5823,5356,5823,5825,5365,5356

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D88620

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/VecFuncs.def
    llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index a47ee3c147252..d3c8973b15ad2 100644

--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -309,14 +309,6 @@ TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
 TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
 TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
 
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8)
-
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8)
-TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16)
-
 TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
 TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
 TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
index da6b4696ba2ba..42c280df6ad02 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -40,8 +40,6 @@ declare float @llvm.log10.f32(float) #0
 
 declare double @sqrt(double) #0
 declare float @sqrtf(float) #0
-declare double @llvm.sqrt.f64(double) #0
-declare float @llvm.sqrt.f32(float) #0
 
 declare double @exp2(double) #0
 declare float @exp2f(float) #0
@@ -746,52 +744,6 @@ for.end:
   ret void
 }
 
-define void @sqrt_f64_intrinsic(double* nocapture %varray) {
-; CHECK-LABEL: @sqrt_f64_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
-; CHECK:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %tmp = trunc i64 %iv to i32
-  %conv = sitofp i32 %tmp to double
-  %call = tail call double @llvm.sqrt.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-define void @sqrt_f32_intrinsic(float* nocapture %varray) {
-; CHECK-LABEL: @sqrt_f32_intrinsic(
-; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
-; CHECK:    ret void
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %tmp = trunc i64 %iv to i32
-  %conv = sitofp i32 %tmp to float
-  %call = tail call float @llvm.sqrt.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
 define void @exp2_f64(double* nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])