[libc-commits] [libc] [libc][math] Improve the performance of sqrtf128. (PR #122578)
Nick Desaulniers via libc-commits
libc-commits at lists.llvm.org
Tue Jan 14 09:35:44 PST 2025
================
@@ -1,20 +1,355 @@
//===-- Implementation of sqrtf128 function -------------------------------===//
//
+// Copyright (c) 2024 Alexei Sibidanov <sibid at uvic.ca>
+//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/math/sqrtf128.h"
-#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/uint128.h"
namespace LIBC_NAMESPACE_DECL {
+using FPBits = fputil::FPBits<float128>;
+
+namespace {
+
+template <typename T, typename U = T> static inline constexpr T prod_hi(T, U);
+
+// Get high part of integer multiplications.
+// Use template to prevent implicit conversion.
+template <>
+inline constexpr uint64_t prod_hi<uint64_t>(uint64_t x, uint64_t y) {
+ return static_cast<uint64_t>(
+ (static_cast<UInt128>(x) * static_cast<UInt128>(y)) >> 64);
+}
+
+// Get high part of unsigned 128x64 bit multiplication.
+template <>
+inline constexpr UInt128 prod_hi<UInt128, uint64_t>(UInt128 y, uint64_t x) {
+ uint64_t y_lo = static_cast<uint64_t>(y);
+ uint64_t y_hi = static_cast<uint64_t>(y >> 64);
+ UInt128 xyl = static_cast<UInt128>(x) * static_cast<UInt128>(y_lo);
+ UInt128 xyh = static_cast<UInt128>(x) * static_cast<UInt128>(y_hi);
+ return xyh + (xyl >> 64);
+}
+
+// Get high part of signed 64x64 bit multiplication.
+template <> inline constexpr int64_t prod_hi<int64_t>(int64_t x, int64_t y) {
+ return static_cast<int64_t>(
+ (static_cast<Int128>(x) * static_cast<Int128>(y)) >> 64);
+}
+
+// Get high 128-bit part of unsigned 128x128 bit multiplication.
+template <> inline constexpr UInt128 prod_hi<UInt128>(UInt128 x, UInt128 y) {
+ uint64_t x_lo = static_cast<uint64_t>(x);
+ uint64_t x_hi = static_cast<uint64_t>(x >> 64);
+ uint64_t y_lo = static_cast<uint64_t>(y);
+ uint64_t y_hi = static_cast<uint64_t>(y >> 64);
+
+ UInt128 xh_yh = static_cast<UInt128>(x_hi) * static_cast<UInt128>(y_hi);
+ UInt128 xh_yl = static_cast<UInt128>(x_hi) * static_cast<UInt128>(y_lo);
+ UInt128 xl_yh = static_cast<UInt128>(x_lo) * static_cast<UInt128>(y_hi);
+
+ xh_yh += xh_yl >> 64;
+
+ return xh_yh + (xl_yh >> 64);
+}
+
+// Get high 128-bit part of mixed sign 128x128 bit multiplication.
+template <>
+inline constexpr Int128 prod_hi<Int128, UInt128>(Int128 x, UInt128 y) {
----------------
nickdesaulniers wrote:
So this overload is for mixed signedness of parameters...and the parameter order matters. Seems error prone; I hope we can't implicitly convert from `UInt128` to `Int128` by accident.
https://github.com/llvm/llvm-project/pull/122578
More information about the libc-commits
mailing list