[libc-commits] [libc] [libc][math][c23] Add f16divf C23 math function (PR #96131)

Mon Jun 24 15:36:02 PDT 2024

================
@@ -0,0 +1,180 @@
+//===-- Division of IEEE 754 floating-point numbers -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
+
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+
+namespace LIBC_NAMESPACE::fputil::generic {
+
+template <typename OutType, typename InType>
+cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+                     cpp::is_floating_point_v<InType> &&
+                     sizeof(OutType) <= sizeof(InType),
+                 OutType>
+div(InType x, InType y) {
+  using OutFPBits = FPBits<OutType>;
+  using OutStorageType = typename OutFPBits::StorageType;
+  using InFPBits = FPBits<InType>;
+  using InStorageType = typename InFPBits::StorageType;
+  using DyadicFloat =
+      DyadicFloat<cpp::bit_ceil(static_cast<size_t>(InFPBits::FRACTION_LEN))>;
+  using DyadicMantissaType = typename DyadicFloat::MantissaType;
+
+  // +1 for the implicit bit.
+  constexpr int DYADIC_EXTRA_MANTISSA_LEN =
+      DyadicMantissaType::BITS - (InFPBits::FRACTION_LEN + 1);
+  // +1 for the extra fractional bit in q.
+  constexpr int Q_EXTRA_FRACTION_LEN =
+      InFPBits::FRACTION_LEN + 1 - OutFPBits::FRACTION_LEN;
+
+  InFPBits x_bits(x);
+  InFPBits y_bits(y);
+
+  if (x_bits.is_nan() || y_bits.is_nan()) {
+    if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan())
----------------
overmighty wrote:

Performance test of the current version (https://github.com/llvm/llvm-project/pull/96131/commits/6439428c4007ff8df13a5c31cb9c37e5ecdb5e07) vs https://github.com/llvm/llvm-project/pull/96131/commits/4ff508d039ac82b9489ece2337231cff259deb8f:

```
 Performance tests with inputs in denormal range:
-- My function --
     Total time      : 224276882 ns 
     Average runtime : 44.8553 ns/op 
     Ops per second  : 22293893 op/s 
-- Other function --
     Total time      : 520018166 ns 
     Average runtime : 104.004 ns/op 
     Ops per second  : 9615058 op/s 
-- Average runtime ratio --
     Mine / Other's  : 0.431287 

 Performance tests with inputs in normal range:
-- My function --
     Total time      : 506187486 ns 
     Average runtime : 101.237 ns/op 
     Ops per second  : 9877772 op/s 
-- Other function --
     Total time      : 642170120 ns 
     Average runtime : 128.434 ns/op 
     Ops per second  : 7786106 op/s 
-- Average runtime ratio --
     Mine / Other's  : 0.788245 

 Performance tests with inputs in normal range with exponents close to each other:
-- My function --
     Total time      : 303552644 ns 
     Average runtime : 60.7105 ns/op 
     Ops per second  : 16471623 op/s 
-- Other function --
     Total time      : 513769699 ns 
     Average runtime : 102.754 ns/op 
     Ops per second  : 9731996 op/s 
-- Average runtime ratio --
     Mine / Other's  : 0.590834 
```


https://github.com/llvm/llvm-project/pull/96131