# [libc-commits] [libc] 82d6e77 - [libc] Implement tanf function correctly rounded for all rounding modes.

Tue Ly via libc-commits libc-commits at lists.llvm.org
Fri Aug 12 06:21:19 PDT 2022

Author: Tue Ly
Date: 2022-08-12T09:21:05-04:00
New Revision: 82d6e7704899ee4f9265680e38465f6e4da9c223

URL: https://github.com/llvm/llvm-project/commit/82d6e7704899ee4f9265680e38465f6e4da9c223
DIFF: https://github.com/llvm/llvm-project/commit/82d6e7704899ee4f9265680e38465f6e4da9c223.diff

LOG: [libc] Implement tanf function correctly rounded for all rounding modes.

Implement tanf function correctly rounded for all rounding modes.

We use the range reduction that is shared with sinf, cosf, and sincosf:

k = round(x * 32/pi) and y = x * (32/pi) - k.

Then we use the tangent of sum formula:

tan(x) = tan((k + y)* pi/32) = tan((k mod 32) * pi / 32 + y * pi/32)
= (tan((k mod 32) * pi/32) + tan(y * pi/32)) / (1 - tan((k mod 32) * pi/32) * tan(y * pi/32))

We need to make a further reduction when k mod 32 >= 16 due to the pole at pi/2 of tan(x) function:

if (k mod 32 >= 16): k = k - 31, y = y - 1.0

And to compute the final result, we store tan(k * pi/32) for k = -15..15 in a table of 32 double values,
and evaluate tan(y * pi/32) with a degree-11 minimax odd polynomial generated by Sollya with:

>  P = fpminimax(tan(y * pi/32)/y, [|0, 2, 4, 6, 8, 10|], [|D...|], [0, 1.5]);


Performance benchmark using perf tool from the CORE-MATH project on Ryzen 1700:

$CORE_MATH_PERF_MODE="rdtsc" ./perf.sh tanf CORE-MATH reciprocal throughput : 18.586 System LIBC reciprocal throughput : 50.068 LIBC reciprocal throughput : 33.823 LIBC reciprocal throughput : 25.161 (with -msse4.2 flag) LIBC reciprocal throughput : 19.157 (with -mfma flag)$ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh tanf --latency
GNU libc version: 2.31
GNU libc release: stable
CORE-MATH latency   : 55.630
System LIBC latency : 106.264

LIBC latency        : 96.060
LIBC latency        : 90.727    (with -msse4.2 flag)
LIBC latency        : 82.361    (with -mfma flag)


Reviewed By: orex

Differential Revision: https://reviews.llvm.org/D131715

libc/src/math/generic/tanf.cpp
libc/src/math/tanf.h
libc/test/src/math/exhaustive/tanf_test.cpp
libc/test/src/math/tanf_test.cpp

Modified:
libc/config/darwin/arm/entrypoints.txt
libc/config/linux/aarch64/entrypoints.txt
libc/config/linux/x86_64/entrypoints.txt
libc/docs/math.rst
libc/src/math/CMakeLists.txt
libc/src/math/generic/CMakeLists.txt
libc/src/math/generic/sincosf_utils.h
libc/test/src/math/CMakeLists.txt
libc/test/src/math/exhaustive/CMakeLists.txt

Removed:

################################################################################
diff  --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -192,6 +192,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
+    libc.src.math.tanf
libc.src.math.tanhf
libc.src.math.trunc
libc.src.math.truncf

diff  --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index e7271fc60d7eb..2b85644b674bc 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -211,6 +211,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
+    libc.src.math.tanf
libc.src.math.tanhf
libc.src.math.trunc
libc.src.math.truncf

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 585df9eba6efa..80285f81dfc3a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -219,6 +219,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sqrtf
libc.src.math.sqrtl
libc.src.math.tan
+    libc.src.math.tanf
libc.src.math.tanhf
libc.src.math.trunc
libc.src.math.truncf

diff  --git a/libc/docs/math.rst b/libc/docs/math.rst
--- a/libc/docs/math.rst
+++ b/libc/docs/math.rst
@@ -143,7 +143,7 @@ sin            |check|          |check|
sincos         |check|          |check|
sinh           |check|
sqrt           |check|          |check|         |check|
-tan
+tan            |check|
tanh           |check|
tgamma
============== ================ =============== ======================
@@ -169,6 +169,7 @@ sin            |check|          large
sincos         |check|          large
sinh           |check|
sqrt           |check|          |check|         |check|
+tan            |check|
tanh           |check|
============== ================ =============== ======================

@@ -236,6 +237,8 @@ Performance
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| sinhf        |        23 |                64 |        73 |               141 | :math:[-10, 10]                   | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA           |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
+| tanf         |        19 |                50 |        82 |               107 | :math:[-\pi, \pi]                 | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA           |
++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| tanhf        |        25 |                59 |        95 |               125 | :math:[-10, 10]                   | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA           |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+

diff  --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 9f693074c8533..b801a08516257 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt

diff  --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index cd027a0693516..3d7b7b58562e6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
-O3
)

+  tanf
+  SRCS
+    tanf.cpp
+  HDRS
+    ../tanf.h
+  DEPENDS
+    .range_reduction
+    libc.include.math
+    libc.src.errno.errno
+    libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.fma
+    libc.src.__support.FPUtil.polyeval
+  COMPILE_OPTIONS
+    -O3
+)
+
fabs
SRCS

diff  --git a/libc/src/math/generic/sincosf_utils.h b/libc/src/math/generic/sincosf_utils.h
index ebb248749f573..38ee2784de5a9 100644
--- a/libc/src/math/generic/sincosf_utils.h
+++ b/libc/src/math/generic/sincosf_utils.h
@@ -88,7 +88,7 @@ static inline void sincosf_eval(double xd, uint32_t x_abs, double &sin_k,
sin_y =
y * fputil::polyeval(ysq, 0x1.921fb54442d18p-4, -0x1.4abbce625abb1p-13,
0x1.466bc624f2776p-24, -0x1.32c3a619d4a7ep-36);
-  // Degree-8 minimax even polynomial for cos(y*pi/32) generated by Sollya with:
+  // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya with:
// > P = fpminimax(cos(x*pi/32), [|0, 2, 4, 6|], [|1, D...|], [0, 0.5]);
// Note that cosm1_y = cos(y*pi/32) - 1.
cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be430bp-8,

diff  --git a/libc/src/math/generic/tanf.cpp b/libc/src/math/generic/tanf.cpp
new file mode 100644
index 0000000000000..42a2d49f45eeb
--- /dev/null
+++ b/libc/src/math/generic/tanf.cpp
@@ -0,0 +1,239 @@
+//===-- Single-precision tan function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/tanf.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/common.h"
+
+#include <errno.h>
+
+#if defined(LIBC_TARGET_HAS_FMA)
+#include "range_reduction_fma.h"
+// using namespace __llvm_libc::fma;
+using __llvm_libc::fma::FAST_PASS_BOUND;
+using __llvm_libc::fma::large_range_reduction;
+using __llvm_libc::fma::small_range_reduction;
+#else
+#include "range_reduction.h"
+// using namespace __llvm_libc::generic;
+using __llvm_libc::generic::FAST_PASS_BOUND;
+using __llvm_libc::generic::large_range_reduction;
+using __llvm_libc::generic::small_range_reduction;
+#endif
+
+namespace __llvm_libc {
+
+// Lookup table for tan(k * pi/32) with k = -15..15 organized as follow:
+//   TAN_K_OVER_32[k] = tan(k * pi/32) for k = 0..15
+//   TAN_K_OVER_32[k] = tan((k - 31) * pi/32) for k = 16..31.
+// This organization allows us to simply do the lookup:
+//   TAN_K_OVER_32[k & 31] for k of type int(32/64) with 2-complement
+// representation.
+// The values of tan(k * pi/32) are generated by Sollya with:
+//   for k from 0 -15 to 15 do { round(tan(k*pi/32), D, RN); };
+static constexpr double TAN_K_PI_OVER_32[32] = {
+    0.0000000000000000,    0x1.936bb8c5b2da2p-4,  0x1.975f5e0553158p-3,
+    0x1.36a08355c63dcp-2,  0x1.a827999fcef32p-2,  0x1.11ab7190834ecp-1,
+    0x1.561b82ab7f99p-1,   0x1.a43002ae4285p-1,   0x1.0000000000000p0,
+    0x1.37efd8d87607ep0,   0x1.7f218e25a7461p0,   0x1.def13b73c1406p0,
+    0x1.3504f333f9de6p1,   0x1.a5f59e90600ddp1,   0x1.41bfee2424771p2,
+    0x1.44e6c595afdccp3,   -0x1.44e6c595afdccp3,  -0x1.41bfee2424771p2,
+    -0x1.a5f59e90600ddp1,  -0x1.3504f333f9de6p1,  -0x1.def13b73c1406p0,
+    -0x1.7f218e25a7461p0,  -0x1.37efd8d87607ep0,  -0x1.0000000000000p0,
+    -0x1.a43002ae4285p-1,  -0x1.561b82ab7f99p-1,  -0x1.11ab7190834ecp-1,
+    -0x1.a827999fcef32p-2, -0x1.36a08355c63dcp-2, -0x1.975f5e0553158p-3,
+    -0x1.936bb8c5b2da2p-4, 0.0000000000000000,
+};
+
+// Exceptional cases for tanf.
+static constexpr int TANF_EXCEPTS = 6;
+
+static constexpr fputil::ExceptionalValues<float, TANF_EXCEPTS> TanfExcepts{
+    /* inputs */ {
+        0x531d744c, // x = 0x1.3ae898p39
+        0x57d7b0ed, // x = 0x1.af61dap48
+        0x65ee8695, // x = 0x1.dd0d2ap76
+        0x6798fe4f, // x = 0x1.31fc9ep80
+        0x6ad36709, // x = 0x1.a6ce12p86
+        0x72b505bb, // x = 0x1.6a0b76p102
+    },
+    /* outputs (RZ, RU offset, RD offset, RN offset) */
+    {
+        {0x4591ea1e, 1, 0, 1}, // x = 0x1.3ae898p39, tan(x) = 0x1.23d43cp12 (RZ)
+        {0x3eb068e3, 1, 0, 1}, // x = 0x1.af61dap48, tan(x) = 0x1.60d1c6p-2 (RZ)
+        {0xcaa32f8e, 0, 1,
+         0}, // x = 0x1.dd0d2ap76, tan(x) = -0x1.465f1cp22 (RZ)
+        {0x461e09f7, 1, 0, 0}, // x = 0x1.31fc9ep80, tan(x) = 0x1.3c13eep13 (RZ)
+        {0xbf62b097, 0, 1,
+         0}, // x = 0x1.a6ce12p86, tan(x) = -0x1.c5612ep-1 (RZ)
+        {0xbff2150f, 0, 1,
+         0}, // x = 0x1.6a0b76p102, tan(x) = -0x1.e42a1ep0 (RZ)
+    }};
+
+LLVM_LIBC_FUNCTION(float, tanf, (float x)) {
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+  constexpr double SIGN[2] = {1.0, -1.0};
+  double x_sign = SIGN[xbits.uintval() >> 31];
+
+  xbits.set_sign(false);
+  uint32_t x_abs = xbits.uintval();
+
+  // Range reduction:
+  //
+  // Since tan(x) is an odd function,
+  //   tan(x) = -tan(-x),
+  // By replacing x with -x if x is negative, we can assume in the following
+  // that x is non-negative.
+  //
+  // We perform a range reduction mod pi/32, so that we ca have a good
+  // polynomial approximation of tan(x) around [-pi/32, pi/32].  Since tan(x) is
+  // periodic with period pi, in the first step of range reduction, we find k
+  // and y such that:
+  //   x = (k + y) * pi/32,
+  //   where k is an integer, and |y| <= 0.5.
+  // Moreover, we only care about the lowest 5 bits of k, since
+  //   tan((k + 32) * pi/32) = tan(k * pi/32 + pi) = tan(k * pi/32).
+  // So after the reduction k = k & 31, we can assume that 0 <= k <= 31.
+  //
+  // For the second step, since tan(x) has a singularity at pi/2, we need a
+  // further reduction so that:
+  //   k * pi/32 < pi/2, or equivalently, 0 <= k < 16.
+  // So if k >= 16, we perform the following transformation:
+  //   tan(x) = tan(x - pi) = tan((k + y) * pi/32 - pi)
+  //          = tan((k - 31 + y - 1) * pi/32)
+  //          = tan((k - 31) * pi/32 + (y - 1) * pi/32)
+  //          = tan(k' * pi/32 + y' * pi/32)
+  // Notice that we only subtract k by 31, not 32, to make sure that |k'| < 16.
+  // In fact, the range of k' is: -15 <= k' <= 0.
+  // But the range of y' is now: -1.5 <= y' <= -0.5.
+  // If we perform round to zero in the first step of finding k and y, so that
+  //   0 <= y <= 1, then the range of y' would be -1 <= y' <= 0, then we can
+  // reduce the degree of polynomial approximation using to approximate
+  // tan(y* pi/32) by 1 or 2 terms.
+  // In any case, for simplicity and to reuse the same range reduction as sinf
+  // and cosf, we opted to use the former range: [-1.5, 1.5] * pi/32 for
+  // the polynomial approximation step.
+  //
+  // Once k and y are computed, we then deduce the answer by the tangent of sum
+  // formula:
+  //   tan(x) = tan((k + y)*pi/32)
+  //          = (tan(y*pi/32) + tan(k*pi/32)) / (1 - tan(y*pi/32)*tan(k*pi/32))
+  // The values of tan(k*pi/32) for k = -15..15 are precomputed and stored using
+  // a vector of 31 doubles. Tan(y*pi/32) is computed using degree-9 minimax
+  // polynomials generated by Sollya.
+
+  // |x| < pi/32
+  if (unlikely(x_abs <= 0x3dc9'0fdbU)) {
+    double xd = static_cast<double>(x);
+
+    // |x| < 0x1.0p-12f
+    if (unlikely(x_abs < 0x3980'0000U)) {
+      if (unlikely(x_abs == 0U)) {
+        // For signed zeros.
+        return x;
+      }
+      // When |x| < 2^-12, the relative error of the approximation tan(x) ~ x
+      // is:
+      //   |tan(x) - x| / |tan(x)| < |x^3| / (3|x|)
+      //                           = x^2 / 3
+      //                           < 2^-25
+      //                           < epsilon(1)/2.
+      // So the correctly rounded values of tan(x) are:
+      //   = x + sign(x)*eps(x) if rounding mode = FE_UPWARD and x is positive,
+      //                        or (rounding mode = FE_DOWNWARD and x is
+      //                        negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, 2^-25, x) instead.
+      // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
+      // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
+      // |x| < 2^-125. For targets without FMA instructions, we simply use
+      // double for intermediate results as it is more efficient than using an
+      // emulated version of FMA.
+#if defined(LIBC_TARGET_HAS_FMA)
+#else
+#endif // LIBC_TARGET_HAS_FMA
+    }
+
+    // |x| < pi/32
+    double xsq = xd * xd;
+
+    // Degree-9 minimax odd polynomial of tan(x) generated by Sollya with:
+    // > P = fpminimax(tan(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/32]);
+    double result =
+        fputil::polyeval(xsq, 1.0, 0x1.555555553d022p-2, 0x1.111111ce442c1p-3,
+                         0x1.ba180a6bbdecdp-5, 0x1.69c0a88a0b71fp-6);
+    return xd * result;
+  }
+
+  // Inf or NaN
+  if (unlikely(x_abs >= 0x7f80'0000U)) {
+    if (x_abs == 0x7f80'0000U) {
+      errno = EDOM;
+      fputil::set_except(FE_INVALID);
+    }
+    return x +
+           FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+  }
+
+  int64_t k;
+  double y;
+  double xd = static_cast<double>(xbits.get_val());
+
+  // Perform the first step of range reduction: find k and y such that
+  //   x = (k + y) * pi/32,
+  //   where k is an integer, and |y| <= 0.5.
+  if (likely(x_abs < FAST_PASS_BOUND)) {
+    k = small_range_reduction(xd, y);
+  } else {
+
+    using ExceptChecker =
+        typename fputil::ExceptionChecker<float, TANF_EXCEPTS>;
+    {
+      float result;
+      if (ExceptChecker::check_odd_func(TanfExcepts, x_abs, x_sign <= 0.0,
+                                        result))
+        return result;
+    }
+
+    fputil::FPBits<float> x_bits(x_abs);
+    k = large_range_reduction(xd, x_bits.get_exponent(), y);
+  }
+
+  // Only care about the lowest 5 bits of k.
+  k &= 31;
+  // Adjust y if k >= 16.
+  constexpr double ADJUSTMENT[2] = {0.0, -1.0};
+  y += ADJUSTMENT[k >> 4];
+
+  double tan_k = TAN_K_PI_OVER_32[k];
+
+  // Degree-10 minimax odd polynomial for tan(y * pi/32)/y generated by Sollya
+  // with:
+  // > P = fpminimax(tan(y*pi/32)/y, [|0, 2, 4, 6, 8, 10|], [|D...|], [0, 1.5]);
+  double ysq = y * y;
+  double tan_y =
+      y * fputil::polyeval(ysq, 0x1.921fb54442d17p-4, 0x1.4abbce625e84cp-12,
+                           0x1.466bc669afd51p-20, 0x1.460013a5aae3p-28,
+
+  // Combine the results with the tangent of sum formula:
+  //   tan(x) = tan((k + y)*pi/32)
+  //          = (tan(k*pi/32) + tan(k*pi/32)) / (1 - tan(y*pi/32)*tan(k*pi/32))
+  return x_sign * (tan_y + tan_k) / fputil::multiply_add(tan_y, -tan_k, 1.0);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/tanf.h b/libc/src/math/tanf.h
new file mode 100644
index 0000000000000..7a9fa01f4c296
--- /dev/null
+++ b/libc/src/math/tanf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for tanf --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_TANF_H
+#define LLVM_LIBC_SRC_MATH_TANF_H
+
+namespace __llvm_libc {
+
+float tanf(float x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_TANF_H

diff  --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index c5cfd819db8f4..e51ae24428ae7 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
libc.src.__support.FPUtil.fputil
)

+  tanf_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    tanf_test.cpp
+  HDRS
+    sdcomp26094.h
+  DEPENDS
+    libc.include.errno
+    libc.src.math.tanf
+    libc.src.__support.CPP.array
+    libc.src.__support.FPUtil.fputil
+)
+
fabs_test
NEED_MPFR

diff  --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index c7e073e77348a..ab80e157e3f34 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
)

+  tanf_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    tanf_test.cpp
+  DEPENDS
+    .exhaustive_test
+    libc.include.math
+    libc.src.math.tanf
+    libc.src.__support.FPUtil.fputil
+)
+
expf_test
NO_RUN_POSTBUILD

diff  --git a/libc/test/src/math/exhaustive/tanf_test.cpp b/libc/test/src/math/exhaustive/tanf_test.cpp
new file mode 100644
index 0000000000000..c1c53eed698f9
--- /dev/null
+++ b/libc/test/src/math/exhaustive/tanf_test.cpp
@@ -0,0 +1,84 @@
+//===-- Exhaustive test for tanf
+//--------------------------------------std::cout----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/tanf.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+
+
+using FPBits = __llvm_libc::fputil::FPBits<float>;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+static constexpr int TOLERANCE = 3;
+
+struct LlvmLibcTanfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+  bool check(uint32_t start, uint32_t stop,
+             mpfr::RoundingMode rounding) override {
+    mpfr::ForceRoundingMode r(rounding);
+    uint32_t bits = start;
+    bool result = true;
+    int tol = TOLERANCE;
+    do {
+      FPBits xbits(bits);
+      float x = float(xbits);
+      bool r = EXPECT_MPFR_MATCH(mpfr::Operation::Tan, x, __llvm_libc::tanf(x),
+                                 0.5, rounding);
+      result &= r;
+      if (!r)
+        --tol;
+      if (!tol)
+        break;
+    } while (++bits < stop);
+    return result;
+  }
+};
+
+// Range: [0, +Inf);
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcTanfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcTanfExhaustiveTest, PostiveRangeRoundUp) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcTanfExhaustiveTest, PostiveRangeRoundDown) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcTanfExhaustiveTest, PostiveRangeRoundTowardZero) {
+  test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero);
+}
+
+// Range: (-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcTanfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcTanfExhaustiveTest, NegativeRangeRoundUp) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcTanfExhaustiveTest, NegativeRangeRoundDown) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcTanfExhaustiveTest, NegativeRangeRoundTowardZero) {
+  test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero);
+}

diff  --git a/libc/test/src/math/tanf_test.cpp b/libc/test/src/math/tanf_test.cpp
new file mode 100644
index 0000000000000..29ae84825006f
--- /dev/null
+++ b/libc/test/src/math/tanf_test.cpp
@@ -0,0 +1,127 @@
+//===-- Unittests for tanf ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/tanf.h"
+#include "test/src/math/sdcomp26094.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+#include "utils/UnitTest/Test.h"
+#include <math.h>
+
+#include <errno.h>
+#include <stdint.h>
+
+using __llvm_libc::testing::SDCOMP26094_VALUES;
+using FPBits = __llvm_libc::fputil::FPBits<float>;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+DECLARE_SPECIAL_CONSTANTS(float)
+
+TEST(LlvmLibcTanfTest, SpecialNumbers) {
+  errno = 0;
+
+  EXPECT_FP_EQ(aNaN, __llvm_libc::tanf(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(0.0f, __llvm_libc::tanf(0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(-0.0f, __llvm_libc::tanf(-0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, __llvm_libc::tanf(inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ(aNaN, __llvm_libc::tanf(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+}
+
+TEST(LlvmLibcTanfTest, InFloatRange) {
+  constexpr uint32_t COUNT = 1000000;
+  constexpr uint32_t STEP = UINT32_MAX / COUNT;
+  for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+    float x = float(FPBits(v));
+    if (isnan(x) || isinf(x))
+      continue;
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tan, x,
+                                   __llvm_libc::tanf(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcTanfTest, SpecificBitPatterns) {
+  constexpr int N = 48;
+  constexpr uint32_t INPUTS[N] = {
+      0x3a7a'8d2fU, // x = 0x1.f51a5ep-11f
+      0x3f06'0a92U, // x = pi/6
+      0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
+      0x3f49'0fdbU, // x = pi/4
+      0x3f86'0a92U, // x = pi/3
+      0x3fa7'832aU, // x = 0x1.4f0654p+0f
+      0x3fc9'0fdbU, // x = pi/2
+      0x4017'1973U, // x = 0x1.2e32e6p+1f
+      0x4049'0fdbU, // x = pi
+      0x4096'cbe4U, // x = 0x1.2d97c8p+2f
+      0x40c9'0fdbU, // x = 2*pi
+      0x433b'7490U, // x = 0x1.76e92p+7f
+      0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
+      0x4619'9998U, // x = 0x1.33333p+13f
+      0x474d'246fU, // x = 0x1.9a48dep+15f
+      0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
+      0x4c23'32e9U, // x = 0x1.4665d2p+25f
+      0x50a3'e87fU, // x = 0x1.47d0fep+34f
+      0x5239'47f6U, // x = 0x1.728fecp+37f
+      0x531d'744cU, // x = 0x1.3ae898p+39f
+      0x53b1'46a6U, // x = 0x1.628d4cp+40f
+      0x5532'5019U, // x = 0x1.64a032p+43f
+      0x55ca'fb2aU, // x = 0x1.95f654p+44f
+      0x588e'f060U, // x = 0x1.1de0cp+50f
+      0x5922'aa80U, // x = 0x1.4555p+51f
+      0x5aa4'542cU, // x = 0x1.48a858p+54f
+      0x5c07'bcd0U, // x = 0x1.0f79ap+57f
+      0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
+      0x5f18'b878U, // x = 0x1.3170fp+63f
+      0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
+      0x6115'cb11U, // x = 0x1.2b9622p+67f
+      0x61a4'0b40U, // x = 0x1.48168p+68f
+      0x6386'134eU, // x = 0x1.0c269cp+72f
+      0x6589'8498U, // x = 0x1.13093p+76f
+      0x65ee'8695U, // x = 0x1.dd0d2ap+76f
+      0x6600'0001U, // x = 0x1.000002p+77f
+      0x664e'46e4U, // x = 0x1.9c8dc8p+77f
+      0x66b0'14aaU, // x = 0x1.602954p+78f
+      0x6798'fe4fU, // x = 0x1.31fc9ep+80f
+      0x67a9'242bU, // x = 0x1.524856p+80f
+      0x6a19'76f1U, // x = 0x1.32ede2p+85f
+      0x6ad3'6709U, // x = 0x1.a6ce12p+86f
+      0x6c55'da58U, // x = 0x1.abb4bp+89f
+      0x6f79'be45U, // x = 0x1.f37c8ap+95f
+      0x7276'69d4U, // x = 0x1.ecd3a8p+101f
+      0x72b5'05bbU, // x = 0x1.6a0b76p+102f
+      0x7758'4625U, // x = 0x1.b08c4ap+111f
+      0x7bee'f5efU, // x = 0x1.ddebdep+120f
+  };
+
+  for (int i = 0; i < N; ++i) {
+    float x = float(FPBits(INPUTS[i]));
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tan, x,
+                                   __llvm_libc::tanf(x), 0.5);
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tan, -x,
+                                   __llvm_libc::tanf(-x), 0.5);
+  }
+}
+
+// SDCOMP-26094: check tanf in the cases for which the range reducer
+// returns values furthest beyond its nominal upper bound of pi/4.
+TEST(LlvmLibcTanfTest, SDCOMP_26094) {
+  for (uint32_t v : SDCOMP26094_VALUES) {
+    float x = float(FPBits(v));
+    ASSERT_MPFR_MATCH(mpfr::Operation::Tan, x, __llvm_libc::tanf(x), 0.5);
+  }
+}