[flang-commits] [flang] [flang] Fold ERFC_SCALED (PR #112287)
via flang-commits
flang-commits at lists.llvm.org
Mon Oct 14 16:54:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-runtime
Author: Peter Klausler (klausler)
<details>
<summary>Changes</summary>
Move the ErfcScaled template function from the runtime into a new header file in flang/include/Common, then use it in constant folding to implement folding for the erfc_scaled() intrinsic function.
---
Full diff: https://github.com/llvm/llvm-project/pull/112287.diff
4 Files Affected:
- (added) flang/include/flang/Common/erfc-scaled.h (+116)
- (modified) flang/lib/Evaluate/intrinsics-library.cpp (+2)
- (modified) flang/runtime/numeric-templates.h (+2-100)
- (added) flang/test/Evaluate/fold-erfc-scaled.f90 (+7)
``````````diff
diff --git a/flang/include/flang/Common/erfc-scaled.h b/flang/include/flang/Common/erfc-scaled.h
new file mode 100644
index 00000000000000..a1bf3ea0f09251
--- /dev/null
+++ b/flang/include/flang/Common/erfc-scaled.h
@@ -0,0 +1,116 @@
+//===-- include/flang/Common/erfc-scaled.h-----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_COMMON_ERFC_SCALED_H_
+#define FORTRAN_COMMON_ERFC_SCALED_H_
+
+namespace Fortran::common {
+template <typename T> inline T ErfcScaled(T arg) {
+ // Coefficients for approximation to erfc in the first interval.
+ static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
+ 3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
+ static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
+ 1.28261652607737228e03, 2.84423683343917062e03};
+
+ // Coefficients for approximation to erfc in the second interval.
+ static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
+ 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
+ 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
+ 2.15311535474403846e-8};
+ static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
+ 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
+ 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
+
+ // Coefficients for approximation to erfc in the third interval.
+ static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
+ 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
+ 1.63153871373020978e-2};
+ static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
+ 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
+
+ constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
+ constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
+ constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
+ constexpr T xneg{-26.628e0};
+ constexpr T xhuge{6.71e7};
+ constexpr T thresh{0.46875e0};
+ constexpr T zero{0.0};
+ constexpr T one{1.0};
+ constexpr T four{4.0};
+ constexpr T sixteen{16.0};
+ constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
+ static_assert(xmax > xhuge, "xmax must be greater than xhuge");
+
+ T ysq;
+ T xnum;
+ T xden;
+ T del;
+ T result;
+
+ auto x{arg};
+ auto y{std::fabs(x)};
+
+ if (y <= thresh) {
+ // evaluate erf for |x| <= 0.46875
+ ysq = zero;
+ if (y > epsilonby2) {
+ ysq = y * y;
+ }
+ xnum = a[4] * ysq;
+ xden = ysq;
+ for (int i{0}; i < 3; i++) {
+ xnum = (xnum + a[i]) * ysq;
+ xden = (xden + b[i]) * ysq;
+ }
+ result = x * (xnum + a[3]) / (xden + b[3]);
+ result = one - result;
+ result = std::exp(ysq) * result;
+ return result;
+ } else if (y <= four) {
+ // evaluate erfc for 0.46875 < |x| <= 4.0
+ xnum = c[8] * y;
+ xden = y;
+ for (int i{0}; i < 7; ++i) {
+ xnum = (xnum + c[i]) * y;
+ xden = (xden + d[i]) * y;
+ }
+ result = (xnum + c[7]) / (xden + d[7]);
+ } else {
+ // evaluate erfc for |x| > 4.0
+ result = zero;
+ if (y >= xhuge) {
+ if (y < xmax) {
+ result = rsqrtpi / y;
+ }
+ } else {
+ ysq = one / (y * y);
+ xnum = p[5] * ysq;
+ xden = ysq;
+ for (int i{0}; i < 4; ++i) {
+ xnum = (xnum + p[i]) * ysq;
+ xden = (xden + q[i]) * ysq;
+ }
+ result = ysq * (xnum + p[4]) / (xden + q[4]);
+ result = (rsqrtpi - result) / y;
+ }
+ }
+ // fix up for negative argument, erf, etc.
+ if (x < zero) {
+ if (x < xneg) {
+ result = std::numeric_limits<T>::max();
+ } else {
+ ysq = trunc(x * sixteen) / sixteen;
+ del = (x - ysq) * (x + ysq);
+ y = std::exp((ysq * ysq)) * std::exp((del));
+ result = (y + y) - result;
+ }
+ }
+ return result;
+}
+} // namespace Fortran::common
+#endif // FORTRAN_COMMON_ERFC_SCALED_H_
diff --git a/flang/lib/Evaluate/intrinsics-library.cpp b/flang/lib/Evaluate/intrinsics-library.cpp
index ce9dd6b7b3df86..ee4df2dbd11327 100644
--- a/flang/lib/Evaluate/intrinsics-library.cpp
+++ b/flang/lib/Evaluate/intrinsics-library.cpp
@@ -14,6 +14,7 @@
#include "flang/Evaluate/intrinsics-library.h"
#include "fold-implementation.h"
#include "host.h"
+#include "flang/Common/erfc-scaled.h"
#include "flang/Common/static-multimap-view.h"
#include "flang/Evaluate/expression.h"
#include <cfloat>
@@ -231,6 +232,7 @@ struct HostRuntimeLibrary<HostT, LibraryVersion::Libm> {
FolderFactory<F, F{std::cosh}>::Create("cosh"),
FolderFactory<F, F{std::erf}>::Create("erf"),
FolderFactory<F, F{std::erfc}>::Create("erfc"),
+ FolderFactory<F, F{common::ErfcScaled}>::Create("erfc_scaled"),
FolderFactory<F, F{std::exp}>::Create("exp"),
FolderFactory<F, F{std::tgamma}>::Create("gamma"),
FolderFactory<F, F{std::log}>::Create("log"),
diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h
index 0b00bbb94ddd21..fbb371bffc27a4 100644
--- a/flang/runtime/numeric-templates.h
+++ b/flang/runtime/numeric-templates.h
@@ -21,6 +21,7 @@
#include "terminator.h"
#include "tools.h"
#include "flang/Common/api-attrs.h"
+#include "flang/Common/erfc-scaled.h"
#include "flang/Common/float128.h"
#include <cstdint>
#include <limits>
@@ -362,106 +363,7 @@ template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
// ERFC_SCALED (16.9.71)
template <typename T> inline RT_API_ATTRS T ErfcScaled(T arg) {
- // Coefficients for approximation to erfc in the first interval.
- static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
- 3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
- static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
- 1.28261652607737228e03, 2.84423683343917062e03};
-
- // Coefficients for approximation to erfc in the second interval.
- static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
- 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
- 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
- 2.15311535474403846e-8};
- static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
- 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
- 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
-
- // Coefficients for approximation to erfc in the third interval.
- static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
- 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
- 1.63153871373020978e-2};
- static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
- 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
-
- constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
- constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
- constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
- constexpr T xneg{-26.628e0};
- constexpr T xhuge{6.71e7};
- constexpr T thresh{0.46875e0};
- constexpr T zero{0.0};
- constexpr T one{1.0};
- constexpr T four{4.0};
- constexpr T sixteen{16.0};
- constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
- static_assert(xmax > xhuge, "xmax must be greater than xhuge");
-
- T ysq;
- T xnum;
- T xden;
- T del;
- T result;
-
- auto x{arg};
- auto y{std::fabs(x)};
-
- if (y <= thresh) {
- // evaluate erf for |x| <= 0.46875
- ysq = zero;
- if (y > epsilonby2) {
- ysq = y * y;
- }
- xnum = a[4] * ysq;
- xden = ysq;
- for (int i{0}; i < 3; i++) {
- xnum = (xnum + a[i]) * ysq;
- xden = (xden + b[i]) * ysq;
- }
- result = x * (xnum + a[3]) / (xden + b[3]);
- result = one - result;
- result = std::exp(ysq) * result;
- return result;
- } else if (y <= four) {
- // evaluate erfc for 0.46875 < |x| <= 4.0
- xnum = c[8] * y;
- xden = y;
- for (int i{0}; i < 7; ++i) {
- xnum = (xnum + c[i]) * y;
- xden = (xden + d[i]) * y;
- }
- result = (xnum + c[7]) / (xden + d[7]);
- } else {
- // evaluate erfc for |x| > 4.0
- result = zero;
- if (y >= xhuge) {
- if (y < xmax) {
- result = rsqrtpi / y;
- }
- } else {
- ysq = one / (y * y);
- xnum = p[5] * ysq;
- xden = ysq;
- for (int i{0}; i < 4; ++i) {
- xnum = (xnum + p[i]) * ysq;
- xden = (xden + q[i]) * ysq;
- }
- result = ysq * (xnum + p[4]) / (xden + q[4]);
- result = (rsqrtpi - result) / y;
- }
- }
- // fix up for negative argument, erf, etc.
- if (x < zero) {
- if (x < xneg) {
- result = std::numeric_limits<T>::max();
- } else {
- ysq = trunc(x * sixteen) / sixteen;
- del = (x - ysq) * (x + ysq);
- y = std::exp((ysq * ysq)) * std::exp((del));
- result = (y + y) - result;
- }
- }
- return result;
+ return common::ErfcScaled(arg);
}
} // namespace Fortran::runtime
diff --git a/flang/test/Evaluate/fold-erfc-scaled.f90 b/flang/test/Evaluate/fold-erfc-scaled.f90
new file mode 100644
index 00000000000000..b38cd0157d0bad
--- /dev/null
+++ b/flang/test/Evaluate/fold-erfc-scaled.f90
@@ -0,0 +1,7 @@
+! RUN: %python %S/test_folding.py %s %flang_fc1
+module m
+ real(4), parameter :: x20_4 = erfc_scaled(20._4)
+ logical, parameter :: t20_4 = x20_4 == 0.02817435003817081451416015625_4
+ real(8), parameter :: x20_8 = erfc_scaled(20._8)
+ logical, parameter :: t20_8 = x20_8 == 0.0281743487410513193669459042212110944092273712158203125_8
+end
``````````
</details>
https://github.com/llvm/llvm-project/pull/112287
More information about the flang-commits
mailing list