[libc-commits] [libc] [llvm] [libc][math] Implement single-precision erfcf (PR #190061)

via libc-commits libc-commits at lists.llvm.org
Fri Apr 3 13:11:18 PDT 2026


================
@@ -0,0 +1,454 @@
+//===-- Implementation header for erfcf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ERFCF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ERFCF_H
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/exp.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE float erfcf(float x) {
+  // Polynomials approximating erfc(|x|) on ( k/8, (k+1)/8 ) generated by
+  // Sollya with: > P = fpminimax(erfc(x), 10, [|D...|], [k/8, (k+1)/8]);
+  // for k = 0..31.
+  constexpr double COEFFS[32][11] = {
+      {0x1p0, -0x1.20dd750429b6dp0, 0x1.7112720c8ba45p-48, 0x1.812746b031144p-2,
+       0x1.222514285ab0dp-33, -0x1.ce2f2335e679fp-4, 0x1.4b61403cf653ep-23,
+       0x1.b82263f5c7f38p-6, 0x1.ac9c9b798c1efp-16, -0x1.60d7a2758fd51p-8,
+       0x1.26e595feee3cdp-11},
+      {0x1.b82879728f11ep-1, -0x1.1c62fa1e86a2p0, 0x1.1c62fa1eaa08dp-3,
+       0x1.6f552db750199p-2, -0x1.196c992766a4ep-4, -0x1.aabac2bf59268p-4,
+       0x1.73676ae99c457p-6, 0x1.88239613f7488p-6, -0x1.557641bb56776p-8,
+       -0x1.86cd9135cc86dp-8, 0x1.d2dfe9a011161p-9},
+      {0x1.728558ee694fcp-1, -0x1.0f5d1602f7e93p0, 0x1.0f5d160305a8p-2,
+       0x1.3c97445490146p-2, -0x1.040e88feae5b7p-3, -0x1.47e61aa7d843fp-4,
+       0x1.4c14b33c12167p-5, 0x1.0813624bf8053p-6, -0x1.33dc8eb36a748p-7,
+       -0x1.e3508ce7c1243p-9, 0x1.d73a7a0da8387p-9},
+      {0x1.311796a46f064p-1, -0x1.f5f0cdaf153e4p-1, 0x1.78749a436137fp-2,
+       0x1.e106c5128ef88p-3, -0x1.5529aa072e883p-3, -0x1.74896ff63fd4cp-5,
+       0x1.9a84e098f157ep-5, 0x1.620ba3a3d5d5ep-8, -0x1.650155e4f6006p-7,
+       -0x1.633c365eb4f97p-10, 0x1.f20567b3e72e5p-9},
+      {0x1.eb02147ce245cp-2, -0x1.c1efca49a5073p-1, 0x1.c1efca49ad0d4p-2,
+       0x1.2bf531818ea5ap-3, -0x1.76f27d186f571p-3, -0x1.dff14d5720b5cp-8,
+       0x1.99f6682bf8fd4p-5, -0x1.63daa9ab40909p-8, -0x1.445cf3f9f807ap-7,
+       0x1.badcb770290f2p-10, 0x1.1174fb01e6698p-9},
+      {0x1.81cd2465e1d96p-2, -0x1.86e9694134b12p-1, 0x1.e8a3c391764d7p-2,
+       0x1.c810503dd433cp-5, -0x1.6963c9d6f4fcbp-3, 0x1.c12528c8239bap-6,
+       0x1.52aa57c68f9c8p-5, -0x1.c67c4b6a1fd9p-7, -0x1.c75138cb5d6cfp-8,
+       0x1.2e514c579a5e5p-8, -0x1.07fe13496794dp-10},
+      {0x1.27c6d14c5e341p-2, -0x1.492e42d78d298p-1, 0x1.edc5644350061p-2,
+       -0x1.b6e8590cf34d1p-6, -0x1.349b5f0ed8b05p-3, 0x1.b42a42369930cp-5,
+       0x1.b842069dfb4e1p-6, -0x1.2dd078b748381p-6, -0x1.3a14a6bd993e6p-9,
+       0x1.1d0541da7a108p-8, -0x1.024d4d28887c2p-10},
+      {0x1.ba36dab91c0e9p-3, -0x1.0cab61f084b7dp-1, 0x1.d62beb64e66cdp-2,
+       -0x1.7c9d7567cde58p-4, -0x1.cc6056e010c19p-4, 0x1.1350fedc7c8d4p-4,
+       0x1.53b5fa5002e8dp-7, -0x1.308f9a3a15148p-6, 0x1.d7d8ba40072f3p-10,
+       0x1.c38fae1003e4ap-9, -0x1.5731dcef3635ap-10},
+      {0x1.4226162fbddd5p-3, -0x1.a911f096fbc44p-2, 0x1.a911f096fd02p-2,
+       -0x1.1b614b101514ep-3, -0x1.1b614acdf645cp-4, 0x1.1b61447098344p-4,
+       -0x1.2e3ee72cc54cfp-8, -0x1.f0b9c4004e99ep-7, 0x1.3ac97256e2bffp-8,
+       0x1.d602e98b4b3f8p-10, -0x1.1fe3083e3949dp-10},
+      {0x1.c9296beb09cf1p-4, -0x1.45e99bcbb792dp-2, 0x1.6ea6cf452f804p-2,
+       -0x1.4cb3cf0b3d596p-3, -0x1.ca50823eddc83p-6, 0x1.f65d0aa94b23cp-5,
+       -0x1.fd196a6987475p-7, -0x1.3aed937418987p-7, 0x1.8d13b74d7478p-8,
+       0x1.fea855dfc40abp-13, -0x1.d5b41ac1c75p-11},
+      {0x1.3bcd133aa0ffcp-4, -0x1.e4652fadcb6bap-3, 0x1.2ebf3dcc9f547p-2,
+       -0x1.571d01c5e8c08p-3, 0x1.93a9a89cbd372p-8, 0x1.8281ca9a40a1p-5,
+       -0x1.5cff78e5e2ef3p-6, -0x1.db7d47bbfd40cp-9, 0x1.76601dde9691cp-8,
+       -0x1.f67319b06ddedp-11, -0x1.1d6660796d9dfp-11},
+      {0x1.a8973c4b5c03ep-5, -0x1.5ce595c455acbp-3, 0x1.dfbbadedf34bap-3,
+       -0x1.4374d82d467fep-3, 0x1.f3b8d4383b809p-6, 0x1.f572dc4041a58p-6,
+       -0x1.6b1856b171392p-6, 0x1.74c906da034dcp-10, 0x1.164472ab539c6p-8,
+       -0x1.96e8164e48141p-10, -0x1.f23662919ef35p-13},
+      {0x1.15aaa8ec85205p-5, -0x1.e723726b824ccp-4, 0x1.6d5a95d0a2785p-3,
+       -0x1.1c2a02bef3ca7p-3, 0x1.6d5a95fbf61c9p-5, 0x1.e7235f9ad34fdp-7,
+       -0x1.3ca32f0bf75bap-6, 0x1.36b86f8f5629dp-8, 0x1.37856e26c43c5p-9,
+       -0x1.d1fb032fd3658p-10, 0x1.28ab82b6ede4p-12},
+      {0x1.612d893085125p-6, -0x1.499d478bca742p-4, 0x1.0bcfca2194c2fp-3,
+       -0x1.d6631e1a5458cp-4, 0x1.974c0377acd1bp-5, 0x1.17d416544c048p-9,
+       -0x1.d8577cd47d84bp-7, 0x1.953fd61239f87p-8, 0x1.30e61f779ae49p-11,
+       -0x1.7860aa19f2f2bp-10, 0x1.c51c72e186623p-12},
+      {0x1.b4be201caa4b4p-7, -0x1.b055303221047p-5, 0x1.7a4a8a2bdde0cp-4,
+       -0x1.7148c3d5c7dcdp-4, 0x1.8a0da55ba5b6bp-5, -0x1.b2226b1319984p-8,
+       -0x1.25b2e37b0cfccp-7, 0x1.8d05b0147e821p-8, -0x1.7cb80598faceep-11,
+       -0x1.dafe289fe6dc7p-11, 0x1.ddb02c85e22bp-12},
+      {0x1.0678442cc256fp-7, -0x1.12ceb37ff9bf6p-5, 0x1.01a1c847fb207p-4,
+       -0x1.143d1c6f98f22p-4, 0x1.5a316537f539fp-5, -0x1.779b26f6c96ap-7,
+       -0x1.0d08a4de0f7bfp-8, 0x1.42f4a3c27232cp-8, -0x1.767cfe2314aa8p-10,
+       -0x1.7b505388abf2ep-12, 0x1.77461c55da518p-12},
+      {0x1.328f5ec350e67p-8, -0x1.529b9e8cf9a5ap-6, 0x1.529b9e8cfac67p-5,
+       -0x1.8b0ae3a4c97a8p-5, 0x1.1a2c59817055ap-5, -0x1.ace74450e5a3dp-7,
+       -0x1.e18d6c7410b76p-12, 0x1.badd28800e644p-9, -0x1.a13c9922134f3p-10,
+       0x1.fff76a75ead39p-15, 0x1.b16cf61ec2dd2p-13},
+      {0x1.5bde729a6b60fp-9, -0x1.94624e78e0f9cp-7, 0x1.ada873606e8ccp-6,
+       -0x1.0ea475da23b64p-5, 0x1.afe553effdf82p-6, -0x1.9973b1e16eba8p-7,
+       0x1.dd772aa53255dp-10, 0x1.ea1c42db10d19p-10, -0x1.6b818e3750232p-10,
+       0x1.43cd402d760d3p-12, 0x1.f62a9b9e2b79ap-15},
+      {0x1.7f713f9cc9784p-10, -0x1.d4143a9dfe98bp-8, 0x1.074b60f8df85ep-6,
+       -0x1.63ef61e8360c2p-6, 0x1.38a98329164bp-6, -0x1.5d3b17ac398f3p-7,
+       0x1.7cade7b6e3c8fp-9, 0x1.5f912c77e0c9cp-11, -0x1.065a339fc525fp-10,
+       0x1.8d01ba06413c9p-12, -0x1.9f30fd338427fp-16},
+      {0x1.9a7c305336484p-11, -0x1.06918b635560ap-8, 0x1.37ccd585f4d01p-7,
+       -0x1.c1ec102e05347p-7, 0x1.ae5961566fe56p-7, -0x1.11dae36755887p-7,
+       0x1.982ad75641d89p-9, -0x1.0244739cd9c83p-13, -0x1.3801f7b1ef4b1p-11,
+       0x1.67a01f83d42a7p-12, -0x1.31172079a63b8p-14},
+      {0x1.aab859b20ac9ep-12, -0x1.1d83170fbf6e2p-9, 0x1.64e3dcd3aece2p-8,
+       -0x1.119da0c45b13cp-7, 0x1.1a89b97a2fc97p-7, -0x1.90e812044d8cap-8,
+       0x1.6ecdb1263d601p-9, -0x1.1c5f1d9c7ca86p-11, -0x1.11689ad5d48efp-12,
+       0x1.06ad3d0e7893cp-12, -0x1.4e2c3615c4c33p-14},
+      {0x1.aeb4423e690e7p-13, -0x1.2ce8988092463p-10, 0x1.8af14828c08d6p-9,
+       -0x1.407fbd190e60ep-8, 0x1.62d4c6daf915dp-8, -0x1.146c4c13fcf6fp-8,
+       0x1.267f5f7f4e4eap-9, -0x1.650055947628fp-11, -0x1.1e4228a58be1fp-15,
+       0x1.39f3a7152bf5cp-13, -0x1.10ca401654daap-14},
+      {0x1.a609f7584d32cp-14, -0x1.3360ccd23db6ep-11, 0x1.a6a519a1163e9p-10,
+       -0x1.69cf466d146e5p-9, 0x1.ab0c2749ee609p-9, -0x1.693598016257dp-9,
+       0x1.b275b08a70196p-10, -0x1.52bf5b6c25969p-11, 0x1.77f30b334c832p-14,
+       0x1.1bec6f126d145p-14, -0x1.6dbabae75ba64p-15},
+      {0x1.916f7c5f2f764p-15, -0x1.30538fbb77ed9p-12, 0x1.b5781e9d7d3cap-11,
+       -0x1.89e17c0789d3ep-10, 0x1.ed4ac7eadf2eap-10, -0x1.c11f2966da996p-10,
+       0x1.2add54b32a56fp-10, -0x1.152601eff3211p-11, 0x1.1d5fb50237f1fp-13,
+       0x1.bf38de649c0e2p-17, -0x1.8eb6ab7d24271p-16},
+      {0x1.729df6503422ap-16, -0x1.2408e9ba3329cp-13, 0x1.b60d5e974e1eap-12,
+       -0x1.9db74b1d7834bp-11, 0x1.11c85b29f8906p-10, -0x1.0a7b56efdf1dbp-10,
+       0x1.82f2829bcc4edp-11, -0x1.9994395c8ff4ep-12, 0x1.1b5192cb88934p-13,
+       -0x1.09940ee068919p-16, -0x1.2c1365d1eeeafp-17},
+      {0x1.4c144d984e1b8p-17, -0x1.0f9e1b4dd36ffp-14, 0x1.a8670aa99bd8cp-13,
+       -0x1.a3737e2a8fd11p-12, 0x1.24544f0f00592p-11, -0x1.2e7e780322a4p-11,
+       0x1.da49c094d9cbep-12, -0x1.1771c29da6d5bp-12, 0x1.d365b26dc63d5p-14,
+       -0x1.bc9452efb4d81p-16, -0x1.5242e0d796318p-23},
+      {0x1.20c1303550f0ep-18, -0x1.e9b5e8d00cebdp-16, 0x1.8de3cd290bcfdp-14,
+       -0x1.9aa489e41acc7p-13, 0x1.2c7d5efa0a9dap-12, -0x1.490a4e85f7136p-12,
+       0x1.145484bf5179cp-12, -0x1.6483263df6976p-13, 0x1.56bc58061dacep-14,
+       -0x1.b9b157530f4f7p-16, 0x1.02372ad94723bp-18},
+      {0x1.e749309831666p-20, -0x1.abe09e9144b66p-17, 0x1.690585ca9211bp-15,
+       -0x1.84522fe886259p-14, 0x1.298f8d468914cp-13, -0x1.5775779f5bfffp-13,
+       0x1.330aade71b1bdp-13, -0x1.ac99f25149457p-14, 0x1.cc1e74a8fb3a7p-15,
+       -0x1.655efd3cca343p-16, 0x1.4240672f45e3ap-18},
+      {0x1.8ef2a9a18d856p-21, -0x1.6a597219a9351p-18, 0x1.3d0e43d671ac5p-16,
+       -0x1.62ccea633f2b9p-15, 0x1.1c07720a51ea6p-14, -0x1.586bad828f8a8p-14,
+       0x1.46150d2e2e10ap-14, -0x1.e82257fc23d66p-15, 0x1.1f2f71c915ddfp-15,
+       -0x1.fde2430bd9393p-17, 0x1.1bae788edb33fp-18},
+      {0x1.3ce784b41192fp-22, -0x1.296a70f413f1p-19, 0x1.0d88765d2c61cp-17,
+       -0x1.394b1fa52107cp-16, 0x1.05760aa9f7bebp-15, -0x1.4c1fdf0066b74p-15,
+       0x1.4b97a44c00f95p-15, -0x1.08551293d164dp-15, 0x1.508819ed4456bp-16,
+       -0x1.4b804729d60a7p-17, 0x1.a5cb84361b63bp-19},
+      {0x1.e87470e4f4241p-24, -0x1.d9371e2ff7808p-21, 0x1.bba3ac4ce4ecdp-19,
+       -0x1.0b6a7b0ce3205p-17, 0x1.d06f57d98111dp-17, -0x1.3436b32f16b84p-16,
+       0x1.4356e15a6fef4p-16, -0x1.1101e1b521bc5p-16, 0x1.74858f952f9d1p-17,
+       -0x1.8fa532c898418p-18, 0x1.1834c46dcd2e3p-19},
+      {0x1.6d3126d74b6c5p-25, -0x1.6ce1aa3fd75f4p-22, 0x1.617a9cedbd5d7p-20,
+       -0x1.b95fa394dd25dp-19, 0x1.8e1fc35332691p-18, -0x1.137164825dc8ap-17,
+       0x1.2eb15f12aec0cp-17, -0x1.0d7ad988a5dc6p-17, 0x1.873ebecd2a922p-18,
+       -0x1.c34d814f00735p-19, 0x1.5563fbf293d8cp-20},
+  };
+
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+  uint32_t x_abs = xbits.abs().uintval();
+
+  constexpr size_t N_ERFCF_EXCEPTS = 2;
+  constexpr fputil::ExceptValues<float, N_ERFCF_EXCEPTS> ERFCF_EXCEPTS = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0.000014103805, erfc(x) = 0.9999841...
+      {0x376C9F62, 0x3f7ffef5, 1, 0, 0},
+      // x = -0.000014103805, erfc(x) = 1.0000159...
+      {0xB76C9F62, 0x3F800085, 1, 0, 0},
+  }};
+
+  if (auto r = ERFCF_EXCEPTS.lookup(xbits.uintval());
+      LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+
+  // Special cases: NaN and Inf
+  if (LIBC_UNLIKELY(x_abs >= 0x7f800000U)) {
+    if (x_abs > 0x7f800000U) {
+      if (xbits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+      return x;
+    }
+    // erfc(+Inf) = 0, erfc(-Inf) = 2
+    return xbits.is_neg() ? fputil::cast<float>(2.0) : fputil::cast<float>(0.0);
+  }
+
+  if (LIBC_UNLIKELY(x_abs == 0))
+    return 1.0f;
+
+  if (LIBC_UNLIKELY(x_abs < 0x33000000U)) { // |x| < 2^-25
+    constexpr double NEG_TWO_OVER_SQRT_PI = -0x1.20dd750429b6dp0;
+    double xd = fputil::cast<double>(x);
+    return fputil::cast<float>(
+        fputil::multiply_add(xd, NEG_TWO_OVER_SQRT_PI, 1.0));
+  }
+
+  // erfc(x) rounds to 0 or 2 for |x| >= 10.125.
+  if (LIBC_UNLIKELY(x_abs >= 0x41220000U)) { // |x| >= 10.125
+    if (xbits.is_neg()) {
+      if (fputil::fenv_is_round_down() || fputil::fenv_is_round_to_zero())
+        return FPBits(0x3FFFFFFFU).get_val(); // next float below 2.0
+      return 2.0f;
+    }
+    fputil::set_errno_if_required(ERANGE);
+    fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
+    if (fputil::fenv_is_round_up())
+      return FPBits::min_subnormal().get_val();
+    return 0.0f;
+  }
+
+  // Polynomial approximation:
+  //   erfc(x) ~ erfc(|x|)      if x >= 0
+  //   erfc(x) ~ 2 - erfc(|x|)  if x < 0
+
+  // erfc(|x|) is evaluated using a degree-10 polynomial on each sub-interval.
+
+  int shift = 147 - (x_abs >> 23);
+  uint64_t mantissa = (x_abs & 0x007FFFFFU) | 0x00800000U;
+  int idx = static_cast<int>(mantissa >> shift);
+
+  if (idx >= 32) {
+    // For |x| >= 4.0, we use an asymptotic expansion in terms of t = 1/x^2.
+    // This accurately computes the steep decay of erfc over the domain
+    // [4.0, 10.125].
+    double xd = fputil::cast<double>(xbits.abs().get_val());
+    double xsq = xd * xd;
+
+    // Asymptotic expansion (Minimax polynomial of degree 12 in 1/x^2):
+    // Generated by Sollya over [4, 10]:
+    // P = fpminimax(erfc(1/sqrt(t)) * (1/sqrt(t)) * exp(1/t) * sqrt(pi), 12,
+    //               [|D...|], [1/100, 1/16], relative);
+    constexpr double ASYMP_COEFFS[13] = {
+        0x1.ffffffffff7ebp-1,  -0x1.fffffffde50adp-2, 0x1.7fffff052b044p-1,
+        -0x1.dfffbad06922cp0,  0x1.a3f9ab8151a5cp2,   -0x1.d817a02e83042p4,
+        0x1.426451515fe5ap7,   -0x1.f9da3719338bep9,  0x1.a3bffa01808f1p12,
+        -0x1.477cd9ce158b1p15, 0x1.a0922e919de1bp17,  -0x1.699343d88926ep19,
+        0x1.35159d114ab16p20,
+    };
+    double t = 1.0 / xsq;
+    double t2 = t * t;
+    double t4 = t2 * t2;
+    double t8 = t4 * t4;
+
+    double p01 = fputil::multiply_add(t, ASYMP_COEFFS[1], ASYMP_COEFFS[0]);
+    double p23 = fputil::multiply_add(t, ASYMP_COEFFS[3], ASYMP_COEFFS[2]);
+    double p45 = fputil::multiply_add(t, ASYMP_COEFFS[5], ASYMP_COEFFS[4]);
+    double p67 = fputil::multiply_add(t, ASYMP_COEFFS[7], ASYMP_COEFFS[6]);
+    double p89 = fputil::multiply_add(t, ASYMP_COEFFS[9], ASYMP_COEFFS[8]);
+    double p10_11 = fputil::multiply_add(t, ASYMP_COEFFS[11], ASYMP_COEFFS[10]);
+
+    double p03 = fputil::multiply_add(t2, p23, p01);
+    double p47 = fputil::multiply_add(t2, p67, p45);
+    double p8_11 = fputil::multiply_add(t2, p10_11, p89);
+
+    double p07 = fputil::multiply_add(t4, p47, p03);
+    double p8_12 = fputil::multiply_add(t4, ASYMP_COEFFS[12], p8_11);
+
+    double p = fputil::multiply_add(t8, p8_12, p07);
+
+    // Lookup table for exp(-k * ln(2) / 128) generated by Sollya.
+    constexpr double EXP_TABLE[128] = {0x1p+0,
----------------
lntue wrote:

expand `0x1.0p+0` to match the length of the rest of them should let clang-format to use less lines here.

https://github.com/llvm/llvm-project/pull/190061


More information about the libc-commits mailing list