[libc-commits] [libc] [libc][math][c23] Add f16divf C23 math function (PR #96131)
via libc-commits
libc-commits at lists.llvm.org
Thu Jun 20 08:16:15 PDT 2024
================
@@ -0,0 +1,180 @@
+//===-- Division of IEEE 754 floating-point numbers -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
+
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+
+namespace LIBC_NAMESPACE::fputil::generic {
+
+template <typename OutType, typename InType>
+cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+div(InType x, InType y) {
+ using OutFPBits = FPBits<OutType>;
+ using OutStorageType = typename OutFPBits::StorageType;
+ using InFPBits = FPBits<InType>;
+ using InStorageType = typename InFPBits::StorageType;
+ using DyadicFloat =
+ DyadicFloat<cpp::bit_ceil(static_cast<size_t>(InFPBits::FRACTION_LEN))>;
+ using DyadicMantissaType = typename DyadicFloat::MantissaType;
+
+ // +1 for the implicit bit.
+ constexpr int DYADIC_EXTRA_MANTISSA_LEN =
+ DyadicMantissaType::BITS - (InFPBits::FRACTION_LEN + 1);
+ // +1 for the extra fractional bit in q.
+ constexpr int Q_EXTRA_FRACTION_LEN =
+ InFPBits::FRACTION_LEN + 1 - OutFPBits::FRACTION_LEN;
+
+ InFPBits x_bits(x);
+ InFPBits y_bits(y);
+
+ if (x_bits.is_nan() || y_bits.is_nan()) {
+ if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan())
+ raise_except_if_required(FE_INVALID);
+
+ // TODO: Handle NaN payloads.
+ return OutFPBits::quiet_nan().get_val();
+ }
+
+ Sign result_sign = x_bits.sign() == y_bits.sign() ? Sign::POS : Sign::NEG;
+
+ if (x_bits.is_inf()) {
+ if (y_bits.is_inf()) {
+ raise_except_if_required(FE_INVALID);
+ return OutFPBits::quiet_nan().get_val();
+ }
+
+ return OutFPBits::inf(result_sign).get_val();
+ }
+
+ if (y_bits.is_inf())
+ return OutFPBits::inf(result_sign).get_val();
+
+ if (y_bits.is_zero()) {
+ if (x_bits.is_zero()) {
+ raise_except_if_required(FE_INVALID);
+ return OutFPBits::quiet_nan().get_val();
+ }
+
+ raise_except_if_required(FE_DIVBYZERO);
+ return OutFPBits::inf(result_sign).get_val();
+ }
+
+ if (x_bits.is_zero())
+ return OutFPBits::zero(result_sign).get_val();
+
+ DyadicFloat xd(x);
+ DyadicFloat yd(y);
+
+ bool would_q_be_subnormal = xd.mantissa < yd.mantissa;
+ int q_exponent = xd.get_unbiased_exponent() - yd.get_unbiased_exponent() -
+ would_q_be_subnormal;
+
+ if (q_exponent + OutFPBits::EXP_BIAS >= OutFPBits::MAX_BIASED_EXPONENT) {
+ switch (quick_get_round()) {
+ case FE_TONEAREST:
+ case FE_UPWARD:
+ return OutFPBits::inf().get_val();
+ default:
+ return OutFPBits::max_normal().get_val();
+ }
+ }
+
+ if (q_exponent < -OutFPBits::EXP_BIAS - OutFPBits::FRACTION_LEN) {
+ switch (quick_get_round()) {
+ case FE_UPWARD:
+ return OutFPBits::min_subnormal().get_val();
+ default:
+ return OutFPBits::zero().get_val();
+ }
+ }
+
+ InStorageType q = 1;
+ InStorageType xd_mant_in = static_cast<InStorageType>(
+ xd.mantissa >> (DYADIC_EXTRA_MANTISSA_LEN - would_q_be_subnormal));
+ InStorageType yd_mant_in =
+ static_cast<InStorageType>(yd.mantissa >> DYADIC_EXTRA_MANTISSA_LEN);
+ InStorageType r = xd_mant_in - yd_mant_in;
+
+ for (size_t i = 0; i < InFPBits::FRACTION_LEN + 1; i++) {
----------------
overmighty wrote:
`OutFPBits::FRACTION_LEN + 1` is not enough. For example, take x = 0x1.758p-138 and y = 0x1.ffd14cp-127. The result should be:
```
> round(0x1.758p-138 / 0x1.ffd14cp-127, HP, RN);
1.0111010111_2 * 2^(-12)
```
However, we would have q = 101110101101, which has the rounding bit set but since there is no sticky bit it would round to 1.0111010110_2 * 2^(-12) with `FE_TONEAREST`.
https://github.com/llvm/llvm-project/pull/96131
More information about the libc-commits
mailing list