[flang-commits] [flang] a03e93e - [flang] Add runtime support for Fortran intrinsic ERFC_SCALED (#95040)
via flang-commits
flang-commits at lists.llvm.org
Tue Jun 11 09:40:04 PDT 2024
Author: David Parks
Date: 2024-06-11T09:40:00-07:00
New Revision: a03e93e1b2172791085f3f8c293b8e5d6ed8d841
URL: https://github.com/llvm/llvm-project/commit/a03e93e1b2172791085f3f8c293b8e5d6ed8d841
DIFF: https://github.com/llvm/llvm-project/commit/a03e93e1b2172791085f3f8c293b8e5d6ed8d841.diff
LOG: [flang] Add runtime support for Fortran intrinsic ERFC_SCALED (#95040)
Co-authored-by: David Parks <dparks at nvidia.com>
Added:
flang/test/Lower/Intrinsics/erfc_scaled.f90
Modified:
flang/include/flang/Optimizer/Builder/IntrinsicCall.h
flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
flang/include/flang/Runtime/numeric.h
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
flang/runtime/numeric-templates.h
flang/runtime/numeric.cpp
flang/unittests/Runtime/Numeric.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 52f2034b8707a..ec1fb411ff0e2 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -204,6 +204,8 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ mlir::Value genErfcScaled(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args);
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);
void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
index 558358257b513..6857650ce52b7 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Numeric.h
@@ -18,6 +18,10 @@ class FirOpBuilder;
namespace fir::runtime {
+/// Generate call to ErfcScaled intrinsic runtime routine.
+mlir::Value genErfcScaled(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value x);
+
/// Generate call to Exponent intrinsic runtime routine.
mlir::Value genExponent(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type resultType, mlir::Value x);
diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h
index 7d3f91360c8cf..e051e86431663 100644
--- a/flang/include/flang/Runtime/numeric.h
+++ b/flang/include/flang/Runtime/numeric.h
@@ -73,6 +73,20 @@ CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling16_16)(
#endif
#endif
+// ERFC_SCALED
+CppTypeFor<TypeCategory::Real, 4> RTDECL(ErfcScaled4)(
+ CppTypeFor<TypeCategory::Real, 4>);
+CppTypeFor<TypeCategory::Real, 8> RTDECL(ErfcScaled8)(
+ CppTypeFor<TypeCategory::Real, 8>);
+#if LDBL_MANT_DIG == 64
+CppTypeFor<TypeCategory::Real, 10> RTDECL(ErfcScaled10)(
+ CppTypeFor<TypeCategory::Real, 10>);
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CppTypeFor<TypeCategory::Real, 16> RTDECL(ErfcScaled16)(
+ CppTypeFor<TypeCategory::Real, 16>);
+#endif
+
// EXPONENT is defined to return default INTEGER; support INTEGER(4 & 8)
CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent4_4)(
CppTypeFor<TypeCategory::Real, 4>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 4cdf1f2d98caa..4317806561693 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -224,6 +224,7 @@ static constexpr IntrinsicHandler handlers[]{
{"boundary", asBox, handleDynamicOptional},
{"dim", asValue}}},
/*isElemental=*/false},
+ {"erfc_scaled", &I::genErfcScaled},
{"etime",
&I::genEtime,
{{{"values", asBox}, {"time", asBox}}},
@@ -5878,6 +5879,16 @@ mlir::Value IntrinsicLibrary::genRRSpacing(mlir::Type resultType,
fir::runtime::genRRSpacing(builder, loc, fir::getBase(args[0])));
}
+// ERFC_SCALED
+mlir::Value IntrinsicLibrary::genErfcScaled(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 1);
+
+ return builder.createConvert(
+ loc, resultType,
+ fir::runtime::genErfcScaled(builder, loc, fir::getBase(args[0])));
+}
+
// SAME_TYPE_AS
fir::ExtendedValue
IntrinsicLibrary::genSameTypeAs(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
index 8ac9d64f576b6..1d13248db5984 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
@@ -22,6 +22,28 @@ using namespace Fortran::runtime;
// may not have them in their runtime library. This can occur in the
// case of cross compilation, for example.
+/// Placeholder for real*10 version of ErfcScaled Intrinsic
+struct ForcedErfcScaled10 {
+ static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled10));
+ static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+ return [](mlir::MLIRContext *ctx) {
+ auto ty = mlir::FloatType::getF80(ctx);
+ return mlir::FunctionType::get(ctx, {ty}, {ty});
+ };
+ }
+};
+
+/// Placeholder for real*16 version of ErfcScaled Intrinsic
+struct ForcedErfcScaled16 {
+ static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ErfcScaled16));
+ static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+ return [](mlir::MLIRContext *ctx) {
+ auto ty = mlir::FloatType::getF128(ctx);
+ return mlir::FunctionType::get(ctx, {ty}, {ty});
+ };
+ }
+};
+
/// Placeholder for real*10 version of Exponent Intrinsic
struct ForcedExponent10_4 {
static constexpr const char *name = ExpandAndQuoteKey(RTNAME(Exponent10_4));
@@ -444,6 +466,30 @@ mlir::Value fir::runtime::genRRSpacing(fir::FirOpBuilder &builder,
return builder.create<fir::CallOp>(loc, func, args).getResult(0);
}
+/// Generate call to ErfcScaled intrinsic runtime routine.
+mlir::Value fir::runtime::genErfcScaled(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value x) {
+ mlir::func::FuncOp func;
+ mlir::Type fltTy = x.getType();
+
+ if (fltTy.isF32())
+ func = fir::runtime::getRuntimeFunc<mkRTKey(ErfcScaled4)>(loc, builder);
+ else if (fltTy.isF64())
+ func = fir::runtime::getRuntimeFunc<mkRTKey(ErfcScaled8)>(loc, builder);
+ else if (fltTy.isF80())
+ func = fir::runtime::getRuntimeFunc<ForcedErfcScaled10>(loc, builder);
+ else if (fltTy.isF128())
+ func = fir::runtime::getRuntimeFunc<ForcedErfcScaled16>(loc, builder);
+ else
+ fir::intrinsicTypeTODO(builder, fltTy, loc, "ERFC_SCALED");
+
+ auto funcTy = func.getFunctionType();
+ llvm::SmallVector<mlir::Value> args = {
+ builder.createConvert(loc, funcTy.getInput(0), x)};
+
+ return builder.create<fir::CallOp>(loc, func, args).getResult(0);
+}
+
/// Generate call to Scale intrinsic runtime routine.
mlir::Value fir::runtime::genScale(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value x,
diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h
index 4936e7738a663..1b5395df94519 100644
--- a/flang/runtime/numeric-templates.h
+++ b/flang/runtime/numeric-templates.h
@@ -354,6 +354,110 @@ template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
}
}
+// ERFC_SCALED (16.9.71)
+template <typename T> inline RT_API_ATTRS T ErfcScaled(T arg) {
+ // Coefficients for approximation to erfc in the first interval.
+ static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
+ 3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
+ static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
+ 1.28261652607737228e03, 2.84423683343917062e03};
+
+ // Coefficients for approximation to erfc in the second interval.
+ static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
+ 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
+ 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
+ 2.15311535474403846e-8};
+ static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
+ 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
+ 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
+
+ // Coefficients for approximation to erfc in the third interval.
+ static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
+ 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
+ 1.63153871373020978e-2};
+ static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
+ 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
+
+ constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
+ constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
+ constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
+ constexpr T xneg{-26.628e0};
+ constexpr T xhuge{6.71e7};
+ constexpr T thresh{0.46875e0};
+ constexpr T zero{0.0};
+ constexpr T one{1.0};
+ constexpr T four{4.0};
+ constexpr T sixteen{16.0};
+ constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
+ static_assert(xmax > xhuge, "xmax must be greater than xhuge");
+
+ T ysq;
+ T xnum;
+ T xden;
+ T del;
+ T result;
+
+ auto x{arg};
+ auto y{std::fabs(x)};
+
+ if (y <= thresh) {
+ // evaluate erf for |x| <= 0.46875
+ ysq = zero;
+ if (y > epsilonby2) {
+ ysq = y * y;
+ }
+ xnum = a[4] * ysq;
+ xden = ysq;
+ for (int i{0}; i < 3; i++) {
+ xnum = (xnum + a[i]) * ysq;
+ xden = (xden + b[i]) * ysq;
+ }
+ result = x * (xnum + a[3]) / (xden + b[3]);
+ result = one - result;
+ result = std::exp(ysq) * result;
+ return result;
+ } else if (y <= four) {
+ // evaluate erfc for 0.46875 < |x| <= 4.0
+ xnum = c[8] * y;
+ xden = y;
+ for (int i{0}; i < 7; ++i) {
+ xnum = (xnum + c[i]) * y;
+ xden = (xden + d[i]) * y;
+ }
+ result = (xnum + c[7]) / (xden + d[7]);
+ } else {
+ // evaluate erfc for |x| > 4.0
+ result = zero;
+ if (y >= xhuge) {
+ if (y < xmax) {
+ result = rsqrtpi / y;
+ }
+ } else {
+ ysq = one / (y * y);
+ xnum = p[5] * ysq;
+ xden = ysq;
+ for (int i{0}; i < 4; ++i) {
+ xnum = (xnum + p[i]) * ysq;
+ xden = (xden + q[i]) * ysq;
+ }
+ result = ysq * (xnum + p[4]) / (xden + q[4]);
+ result = (rsqrtpi - result) / y;
+ }
+ }
+ // fix up for negative argument, erf, etc.
+ if (x < zero) {
+ if (x < xneg) {
+ result = std::numeric_limits<T>::max();
+ } else {
+ ysq = trunc(x * sixteen) / sixteen;
+ del = (x - ysq) * (x + ysq);
+ y = std::exp((ysq * ysq)) * std::exp((del));
+ result = (y + y) - result;
+ }
+ }
+ return result;
+}
+
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_
diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp
index 2225473c4690e..7c40beb31083f 100644
--- a/flang/runtime/numeric.cpp
+++ b/flang/runtime/numeric.cpp
@@ -316,6 +316,27 @@ CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling16_16)(
#endif
#endif
+CppTypeFor<TypeCategory::Real, 4> RTDEF(ErfcScaled4)(
+ CppTypeFor<TypeCategory::Real, 4> x) {
+ return ErfcScaled(x);
+}
+CppTypeFor<TypeCategory::Real, 8> RTDEF(ErfcScaled8)(
+ CppTypeFor<TypeCategory::Real, 8> x) {
+ return ErfcScaled(x);
+}
+#if LDBL_MANT_DIG == 64
+CppTypeFor<TypeCategory::Real, 10> RTDEF(ErfcScaled10)(
+ CppTypeFor<TypeCategory::Real, 10> x) {
+ return ErfcScaled(x);
+}
+#endif
+#if LDBL_MANT_DIG == 113
+CppTypeFor<TypeCategory::Real, 16> RTDEF(ErfcScaled16)(
+ CppTypeFor<TypeCategory::Real, 16> x) {
+ return ErfcScaled(x);
+}
+#endif
+
CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent4_4)(
CppTypeFor<TypeCategory::Real, 4> x) {
return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
diff --git a/flang/test/Lower/Intrinsics/erfc_scaled.f90 b/flang/test/Lower/Intrinsics/erfc_scaled.f90
new file mode 100644
index 0000000000000..ab5e90cb2409e
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/erfc_scaled.f90
@@ -0,0 +1,23 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPerfc_scaled4(
+! CHECK-SAME: %[[x:[^:]+]]: !fir.ref<f32>{{.*}}) -> f32
+function erfc_scaled4(x)
+ real(kind=4) :: erfc_scaled4
+ real(kind=4) :: x
+ erfc_scaled4 = erfc_scaled(x);
+! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref<f32>
+! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled4(%[[a1]]) {{.*}}: (f32) -> f32
+end function erfc_scaled4
+
+
+! CHECK-LABEL: func @_QPerfc_scaled8(
+! CHECK-SAME: %[[x:[^:]+]]: !fir.ref<f64>{{.*}}) -> f64
+function erfc_scaled8(x)
+ real(kind=8) :: erfc_scaled8
+ real(kind=8) :: x
+ erfc_scaled8 = erfc_scaled(x);
+! CHECK: %[[a1:.*]] = fir.load %[[x]] : !fir.ref<f64>
+! CHECK: %{{.*}} = fir.call @_FortranAErfcScaled8(%[[a1]]) {{.*}}: (f64) -> f64
+end function erfc_scaled8
diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp
index b69ff21ea79fb..9f77e16570783 100644
--- a/flang/unittests/Runtime/Numeric.cpp
+++ b/flang/unittests/Runtime/Numeric.cpp
@@ -31,6 +31,14 @@ TEST(Numeric, Floor) {
EXPECT_EQ(RTNAME(Floor4_1)(Real<4>{0}), 0);
}
+TEST(Numeric, Erfc_scaled) {
+ EXPECT_NEAR(RTNAME(ErfcScaled4)(Real<4>{20.0}), 0.02817434874, 1.0e-8);
+ EXPECT_NEAR(RTNAME(ErfcScaled8)(Real<8>{20.0}), 0.02817434874, 1.0e-11);
+#if LDBL_MANT_DIG == 64
+ EXPECT_NEAR(RTNAME(ErfcScaled10)(Real<10>{20.0}), 0.02817434874, 1.0e-8);
+#endif
+}
+
TEST(Numeric, Exponent) {
EXPECT_EQ(RTNAME(Exponent4_4)(Real<4>{0}), 0);
EXPECT_EQ(RTNAME(Exponent4_8)(Real<4>{1.0}), 1);
More information about the flang-commits
mailing list