[libc-commits] [libc] [libc][math] Implement fast pass for double precision pow function with up to 1ULP error. (PR #101926)
via libc-commits
libc-commits at lists.llvm.org
Mon Aug 5 08:14:11 PDT 2024
https://github.com/lntue updated https://github.com/llvm/llvm-project/pull/101926
>From 650f19099ccd2af87ce403623494d26e97c81589 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Mon, 5 Aug 2024 06:45:59 +0000
Subject: [PATCH 1/3] [libc][math] Implement fast pass for double precision pow
function with up to 1ULP error.
---
libc/config/darwin/arm/entrypoints.txt | 1 +
libc/config/linux/aarch64/entrypoints.txt | 1 +
libc/config/linux/arm/entrypoints.txt | 1 +
libc/config/linux/riscv/entrypoints.txt | 1 +
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/config/windows/entrypoints.txt | 1 +
libc/docs/math/index.rst | 2 +-
libc/src/math/amdgpu/CMakeLists.txt | 12 -
libc/src/math/amdgpu/pow.cpp | 21 -
libc/src/math/generic/CMakeLists.txt | 20 +
libc/src/math/generic/pow.cpp | 490 ++++++++++++++++++++++
libc/src/math/nvptx/CMakeLists.txt | 12 -
libc/src/math/nvptx/pow.cpp | 19 -
libc/test/src/math/CMakeLists.txt | 11 +
libc/test/src/math/pow_test.cpp | 114 +++++
libc/test/src/math/smoke/CMakeLists.txt | 10 +
libc/test/src/math/smoke/pow_test.cpp | 188 +++++++++
17 files changed, 840 insertions(+), 65 deletions(-)
delete mode 100644 libc/src/math/amdgpu/pow.cpp
create mode 100644 libc/src/math/generic/pow.cpp
delete mode 100644 libc/src/math/nvptx/pow.cpp
create mode 100644 libc/test/src/math/pow_test.cpp
create mode 100644 libc/test/src/math/smoke/pow_test.cpp
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index 3e4cb3cebce9b..dfbba63d769ee 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -227,6 +227,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nexttoward
libc.src.math.nexttowardf
libc.src.math.nexttowardl
+ libc.src.math.pow
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index dff8f25142dd9..149c368338276 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -521,6 +521,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextup
libc.src.math.nextupf
libc.src.math.nextupl
+ libc.src.math.pow
libc.src.math.powf
libc.src.math.remainder
libc.src.math.remainderf
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 90aae962080cd..3dc8aca518305 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -350,6 +350,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextup
libc.src.math.nextupf
libc.src.math.nextupl
+ libc.src.math.pow
libc.src.math.powf
libc.src.math.remainder
libc.src.math.remainderf
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 29969fce6adf8..15a6827c040c7 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -520,6 +520,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextup
libc.src.math.nextupf
libc.src.math.nextupl
+ libc.src.math.pow
libc.src.math.powf
libc.src.math.remainder
libc.src.math.remainderf
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 2ed287dc8542e..0c54fb267f855 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -520,6 +520,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nextup
libc.src.math.nextupf
libc.src.math.nextupl
+ libc.src.math.pow
libc.src.math.powf
libc.src.math.remainder
libc.src.math.remainderf
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index b7aac225ee055..d66e7f1fed81f 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -240,6 +240,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.nexttoward
libc.src.math.nexttowardf
libc.src.math.nexttowardl
+ libc.src.math.pow
libc.src.math.powf
libc.src.math.remainderf
libc.src.math.remainder
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index defd075d10997..14ef59eaa03f1 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -320,7 +320,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| logp1 | | | | | | 7.12.6.14 | F.10.3.14 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| pow | |check| | | | | | 7.12.7.5 | F.10.4.5 |
+| pow | |check| | 1 ULP | | | | 7.12.7.5 | F.10.4.5 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| powi\* | | | | | | | |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/src/math/amdgpu/CMakeLists.txt b/libc/src/math/amdgpu/CMakeLists.txt
index 2ceb12785c607..86785e56347e7 100644
--- a/libc/src/math/amdgpu/CMakeLists.txt
+++ b/libc/src/math/amdgpu/CMakeLists.txt
@@ -456,18 +456,6 @@ add_entrypoint_object(
VENDOR
)
-add_entrypoint_object(
- pow
- SRCS
- pow.cpp
- HDRS
- ../pow.h
- COMPILE_OPTIONS
- ${bitcode_link_flags}
- -O2
- VENDOR
-)
-
add_entrypoint_object(
powi
SRCS
diff --git a/libc/src/math/amdgpu/pow.cpp b/libc/src/math/amdgpu/pow.cpp
deleted file mode 100644
index 979ad6cda6ef0..0000000000000
--- a/libc/src/math/amdgpu/pow.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- Implementation of the pow function for GPU ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/math/pow.h"
-#include "src/__support/common.h"
-
-#include "declarations.h"
-#include "src/__support/macros/config.h"
-
-namespace LIBC_NAMESPACE_DECL {
-
-LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
- return __ocml_pow_f64(x, y);
-}
-
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 58daffaf4e932..8d59f5de9c310 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1552,6 +1552,26 @@ add_entrypoint_object(
-O3
)
+add_entrypoint_object(
+ pow
+ SRCS
+ pow.cpp
+ HDRS
+ ../pow.h
+ DEPENDS
+ .common_constants
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+ COMPILE_OPTIONS
+ -O3
+)
+
add_entrypoint_object(
copysign
SRCS
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
new file mode 100644
index 0000000000000..49f3ca602c207
--- /dev/null
+++ b/libc/src/math/generic/pow.cpp
@@ -0,0 +1,490 @@
+//===-- Double-precision x^y function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/pow.h"
+#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/sqrt.h" // Speedup for pow(x, 1/2) = sqrt(x)
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+using fputil::DoubleDouble;
+
+namespace {
+
+// Constants for log2(x) range reduction, generated by Sollya with:
+// > for i from 0 to 127 do {
+// r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) );
+// b = nearestint(log2(r) * 2^41) * 2^-41;
+// c = round(log2(r) - b, D, RN);
+// print("{", -c, ",", -b, "},");
+// };
+// This is the same as -log2(RD[i]), with the least significant bits of the
+// high part set to be 2^-41, so that the sum of high parts + e_x is exact in
+// double precision.
+// We also replace the first and the last ones to be 0.
+constexpr DoubleDouble LOG2_R_DD[128] = {
+ {0.0, 0.0},
+ {-0x1.19b14945cf6bap-44, 0x1.72c7ba21p-7},
+ {-0x1.95539356f93dcp-43, 0x1.743ee862p-6},
+ {0x1.abe0a48f83604p-43, 0x1.184b8e4c5p-5},
+ {0x1.635577970e04p-43, 0x1.77394c9d9p-5},
+ {-0x1.401fbaaa67e3cp-45, 0x1.d6ebd1f2p-5},
+ {-0x1.5b1799ceaeb51p-43, 0x1.1bb32a6008p-4},
+ {0x1.7c407050799bfp-43, 0x1.4c560fe688p-4},
+ {0x1.da6339da288fcp-43, 0x1.7d60496cf8p-4},
+ {0x1.be4f6f22dbbadp-43, 0x1.960caf9ab8p-4},
+ {-0x1.c760bc9b188c4p-45, 0x1.c7b528b71p-4},
+ {0x1.164e932b2d51cp-44, 0x1.f9c95dc1dp-4},
+ {0x1.924ae921f7ecap-45, 0x1.097e38ce6p-3},
+ {-0x1.6d25a5b8a19b2p-44, 0x1.22dadc2ab4p-3},
+ {0x1.e50a1644ac794p-43, 0x1.3c6fb650ccp-3},
+ {0x1.f34baa74a7942p-43, 0x1.494f863b8cp-3},
+ {-0x1.8f7aac147fdc1p-46, 0x1.633a8bf438p-3},
+ {0x1.f84be19cb9578p-43, 0x1.7046031c78p-3},
+ {-0x1.66cccab240e9p-46, 0x1.8a8980abfcp-3},
+ {-0x1.3f7a55cd2af4cp-47, 0x1.97c1cb13c8p-3},
+ {0x1.3458cde69308cp-43, 0x1.b2602497d4p-3},
+ {-0x1.667f21fa8423fp-44, 0x1.bfc67a8p-3},
+ {0x1.d2fe4574e09b9p-47, 0x1.dac22d3e44p-3},
+ {0x1.367bde40c5e6dp-43, 0x1.e857d3d36p-3},
+ {0x1.d45da26510033p-46, 0x1.01d9bbcfa6p-2},
+ {-0x1.7204f55bbf90dp-44, 0x1.08bce0d96p-2},
+ {-0x1.d4f1b95e0ff45p-43, 0x1.169c05364p-2},
+ {0x1.c20d74c0211bfp-44, 0x1.1d982c9d52p-2},
+ {0x1.ad89a083e072ap-43, 0x1.249cd2b13cp-2},
+ {0x1.cd0cb4492f1bcp-43, 0x1.32bfee370ep-2},
+ {-0x1.2101a9685c779p-47, 0x1.39de8e155ap-2},
+ {0x1.9451cd394fe8dp-43, 0x1.4106017c3ep-2},
+ {0x1.661e393a16b95p-44, 0x1.4f6fbb2cecp-2},
+ {-0x1.c6d8d86531d56p-44, 0x1.56b22e6b58p-2},
+ {0x1.c1c885adb21d3p-43, 0x1.5dfdcf1eeap-2},
+ {0x1.3bb5921006679p-45, 0x1.6552b49986p-2},
+ {0x1.1d406db502403p-43, 0x1.6cb0f6865cp-2},
+ {0x1.55a63e278bad5p-43, 0x1.7b89f02cf2p-2},
+ {-0x1.66ae2a7ada553p-49, 0x1.8304d90c12p-2},
+ {-0x1.66cccab240e9p-45, 0x1.8a8980abfcp-2},
+ {-0x1.62404772a151dp-45, 0x1.921800924ep-2},
+ {0x1.ac9bca36fd02ep-44, 0x1.99b072a96cp-2},
+ {0x1.4bc302ffa76fbp-43, 0x1.a8ff97181p-2},
+ {0x1.01fea1ec47c71p-43, 0x1.b0b67f4f46p-2},
+ {-0x1.f20203b3186a6p-43, 0x1.b877c57b1cp-2},
+ {-0x1.2642415d47384p-45, 0x1.c043859e3p-2},
+ {-0x1.bc76a2753b99bp-50, 0x1.c819dc2d46p-2},
+ {-0x1.da93ae3a5f451p-43, 0x1.cffae611aep-2},
+ {-0x1.50e785694a8c6p-43, 0x1.d7e6c0abc4p-2},
+ {0x1.c56138c894641p-43, 0x1.dfdd89d586p-2},
+ {0x1.5669df6a2b592p-43, 0x1.e7df5fe538p-2},
+ {-0x1.ea92d9e0e8ac2p-48, 0x1.efec61b012p-2},
+ {0x1.a0331af2e6feap-43, 0x1.f804ae8d0cp-2},
+ {0x1.9518ce032f41dp-48, 0x1.0014332bep-1},
+ {-0x1.b3b3864c60011p-44, 0x1.042bd4b9a8p-1},
+ {-0x1.103e8f00d41c8p-45, 0x1.08494c66b9p-1},
+ {0x1.65be75cc3da17p-43, 0x1.0c6caaf0c5p-1},
+ {0x1.3676289cd3dd4p-43, 0x1.1096015deep-1},
+ {-0x1.41dfc7d7c3321p-43, 0x1.14c560fe69p-1},
+ {0x1.e0cda8bd74461p-44, 0x1.18fadb6e2dp-1},
+ {0x1.2a606046ad444p-44, 0x1.1d368296b5p-1},
+ {0x1.f9ea977a639cp-43, 0x1.217868b0c3p-1},
+ {-0x1.50520a377c7ecp-45, 0x1.25c0a0463cp-1},
+ {0x1.6e3cb71b554e7p-47, 0x1.2a0f3c3407p-1},
+ {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1},
+ {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1},
+ {-0x1.979a5db68721dp-45, 0x1.32bfee370fp-1},
+ {0x1.1ee969a95f529p-43, 0x1.37222bb707p-1},
+ {0x1.bb4b69336b66ep-43, 0x1.3b8b1c68fap-1},
+ {0x1.d5e6a8a4fb059p-45, 0x1.3ffad4e74fp-1},
+ {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1},
+ {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1},
+ {-0x1.9bcaf1aa4168ap-43, 0x1.48eef19318p-1},
+ {0x1.1646b761c48dep-44, 0x1.4d7380dcc4p-1},
+ {0x1.2f0c0bfe9dbecp-43, 0x1.51ff2e3021p-1},
+ {0x1.29904613e33cp-43, 0x1.5692101d9bp-1},
+ {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1},
+ {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1},
+ {-0x1.125d6cbcd1095p-44, 0x1.5fcdce2728p-1},
+ {-0x1.bd9b32266d92cp-43, 0x1.6476d98adap-1},
+ {0x1.54243b21709cep-44, 0x1.6927781d93p-1},
+ {0x1.54243b21709cep-44, 0x1.6927781d93p-1},
+ {-0x1.ce60916e52e91p-44, 0x1.6ddfc2a79p-1},
+ {0x1.f1f5ae718f241p-43, 0x1.729fd26b7p-1},
+ {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1},
+ {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1},
+ {0x1.fed21f9cb2cc5p-43, 0x1.7c37a9227ep-1},
+ {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1},
+ {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1},
+ {0x1.5b338360c2ae2p-43, 0x1.85efd062c6p-1},
+ {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1},
+ {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1},
+ {-0x1.bdc81c4db3134p-44, 0x1.8fc924c89bp-1},
+ {0x1.36c101ee1344p-43, 0x1.94c287492cp-1},
+ {0x1.36c101ee1344p-43, 0x1.94c287492cp-1},
+ {0x1.e41fa0a62e6aep-44, 0x1.99c48be206p-1},
+ {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1},
+ {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1},
+ {-0x1.3f94e00e7d6bcp-46, 0x1.a3e2f4ac44p-1},
+ {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1},
+ {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1},
+ {0x1.1659d8e2d7d38p-44, 0x1.ae255819fp-1},
+ {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1},
+ {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1},
+ {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1},
+ {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1},
+ {0x1.871a7610e40bdp-45, 0x1.bdce9dcc96p-1},
+ {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1},
+ {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1},
+ {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1},
+ {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1},
+ {-0x1.9ad57391924a7p-43, 0x1.cdcebd2374p-1},
+ {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1},
+ {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1},
+ {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1},
+ {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1},
+ {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1},
+ {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1},
+ {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1},
+ {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1},
+ {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1},
+ {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1},
+ {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1},
+ {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1},
+ {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1},
+ {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1},
+ {0x1.ef5d00e390ap-44, 0x1.fa406bd244p-1},
+ {0.0, 1.0},
+};
+
+bool is_odd_integer(double x) {
+ using FPBits = fputil::FPBits<double>;
+ FPBits xbits(x);
+ uint64_t x_u = xbits.uintval();
+ unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
+ unsigned lsb =
+ static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK));
+ constexpr unsigned UNIT_EXPONENT =
+ static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
+ return (x_e + lsb == UNIT_EXPONENT);
+}
+
+bool is_integer(double x) {
+ using FPBits = fputil::FPBits<double>;
+ FPBits xbits(x);
+ uint64_t x_u = xbits.uintval();
+ unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
+ unsigned lsb =
+ static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK));
+ constexpr unsigned UNIT_EXPONENT =
+ static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
+ return (x_e + lsb >= UNIT_EXPONENT);
+}
+
+} // namespace
+
+LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
+ using FPBits = fputil::FPBits<double>;
+
+ FPBits xbits(x), ybits(y);
+
+ bool x_sign = xbits.sign() == Sign::NEG;
+ bool y_sign = ybits.sign() == Sign::NEG;
+
+ FPBits x_abs = xbits.abs();
+ FPBits y_abs = ybits.abs();
+
+ uint64_t x_mant = xbits.get_mantissa();
+ uint64_t y_mant = ybits.get_mantissa();
+ uint64_t x_u = xbits.uintval();
+ uint64_t x_a = x_abs.uintval();
+ uint64_t y_a = y_abs.uintval();
+
+ double e_x = static_cast<double>(xbits.get_exponent());
+ uint64_t sign = 0;
+
+ ///////// BEGIN - Check exceptional cases ////////////////////////////////////
+
+ // The double precision number that is closest to 1 is (1 - 2^-53), which has
+ // log2(1 - 2^-53) ~ -1.715...p-53.
+ // So if |y| > 1075 / log2(1 - 2^-53), and x is finite:
+ // |y * log2(x)| = 0 or > 1075.
+ // Hence x^y will either overflow or underflow if x is not zero.
+ if (LIBC_UNLIKELY(y_mant == 0 || y_a > 0x43d74910d52d3052 ||
+ x_u == FPBits::one().uintval() ||
+ x_u >= FPBits::inf().uintval() ||
+ x_u < FPBits::min_normal().uintval())) {
+ // Exceptional exponents.
+ switch (y_a) {
+ case 0: // y = +-0.0
+ return 1.0;
+ case 0x3fe0'0000'0000'0000: // y = +-0.5
+ // TODO: speed up x^(-1/2) with rsqrt(x) when available.
+ return y_sign ? (1.0 / fputil::sqrt<double>(x)) : fputil::sqrt<double>(x);
+ case 0x3ff0'0000'0000'0000: // y = +-1.0
+ return y_sign ? (1.0 / x) : x;
+ case 0x4000'0000'0000'0000: // y = +-2.0;
+ return y_sign ? (1.0 / (x * x)) : (x * x);
+ }
+
+ if (y_a > 0x43d7'4910'd52d'3052) {
+ if (y_a >= 0x7ff0'0000'0000'0000) {
+ // y is inf or nan
+ if (y_mant != 0)
+ // y is NaN
+ // pow(1, NaN) = 1
+ // pow(x, NaN) = NaN
+ return (x_u == FPBits::one().uintval()) ? 1.0 : y;
+
+ // Now y is +-Inf
+ if (x_abs.is_nan())
+ // pow(NaN, +-Inf) = NaN
+ return x;
+
+ if (x_a == 0x3ff0'0000'0000'0000)
+ // pow(+-1, +-Inf) = 1.0
+ return 1.0;
+
+ if (x_a == 0 && y_sign) {
+ // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_DIVBYZERO);
+ return FPBits::inf().get_val();
+ }
+ // pow (|x| < 1, -inf) = +inf
+ // pow (|x| < 1, +inf) = 0.0f
+ // pow (|x| > 1, -inf) = 0.0f
+ // pow (|x| > 1, +inf) = +inf
+ return ((x_a < FPBits::one().uintval()) == y_sign)
+ ? FPBits::inf().get_val()
+ : 0.0;
+ }
+ // x^y will be overflow / underflow in double precision. Set y to a
+ // large enough exponent but not too large, so that the computations
+ // won't be overflow in double precision.
+ y = y_sign ? -0x1.0p100 : 0x1.0p100;
+ }
+
+ // y is finite and non-zero.
+
+ if (x_u == FPBits::one().uintval())
+ // pow(1, y) = 1
+ return 1.0;
+
+ // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y).
+
+ if (x_a == 0) {
+ const bool out_is_neg = x_sign && is_odd_integer(y);
+ if (y_sign) {
+ // pow(0, negative number) = inf
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_DIVBYZERO);
+ return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
+ }
+ // pow(0, positive number) = 0
+ return out_is_neg ? -0.0 : 0.0;
+ }
+
+ if (x_a == FPBits::inf().uintval()) {
+ const bool out_is_neg = x_sign && is_odd_integer(y);
+ if (y_sign)
+ return out_is_neg ? -0.0f : 0.0f;
+ return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
+ }
+
+ if (x_a > FPBits::inf().uintval()) {
+ // x is NaN.
+ // pow (aNaN, 0) is already taken care above.
+ return x;
+ }
+
+ // Normalize denormal inputs.
+ if (x_a < FPBits::min_normal().uintval()) {
+ e_x -= 64.0;
+ x_mant = FPBits(x * 0x1.0p64).get_mantissa();
+ }
+
+ // x is finite and negative, and y is a finite integer.
+ if (x_sign) {
+ if (is_integer(y)) {
+ x = -x;
+ if (is_odd_integer(y))
+ // sign = -1.0;
+ sign = 0x8000'0000'0000'0000ULL;
+ } else {
+ // pow( negative, non-integer ) = NaN
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ }
+ }
+
+ ///////// END - Check exceptional cases //////////////////////////////////////
+
+ // x^y = 2^( y * log2(x) )
+ // = 2^( y * ( e_x + log2(m_x) ) )
+ // First we compute log2(x) = e_x + log2(m_x)
+
+ // Extract exponent field of x.
+
+ // Use the highest 7 fractional bits of m_x as the index for look up tables.
+ unsigned idx_x = static_cast<unsigned>(x_mant >> (FPBits::FRACTION_LEN - 7));
+ // Add the hidden bit to the mantissa.
+ // 1 <= m_x < 2
+ FPBits m_x = FPBits(x_mant | 0x3FF0'0000'0000'0000);
+
+ // Reduced argument for log2(m_x):
+ // dx = r * m_x - 1.
+ // The computation is exact, and -2^-8 <= dx < 2^-7.
+ // Then m_x = (1 + dx) / r, and
+ // log2(m_x) = log2( (1 + dx) / r )
+ // = log2(1 + dx) - log2(r).
+ // Perform exact range reduction
+ double dx;
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+ dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
+#else
+ double c = FPBits(m_x.uintval() & 0x3FFF'E000'0000'0000).get_val();
+ dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
+#endif // LIBC_TARGET_CPU_HAS_FMA
+
+ // Degree-5 polynomial approximation:
+ // dx * P(dx) ~ log2(1 + dx)
+ // Generated by Sollya with:
+ // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
+ // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
+ // 0x1.653...p-52
+ constexpr double COEFFS[] = {0x1.71547652b82fep0, -0x1.71547652b7a07p-1,
+ 0x1.ec709dc458db1p-2, -0x1.715479c2266c9p-2,
+ 0x1.2776ae1ddf8fp-2, -0x1.e7b2178870157p-3};
+
+ double dx2 = dx * dx; // Exact
+ double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
+ double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
+ double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
+
+ double p = fputil::polyeval(dx2, c0, c1, c2);
+
+ // s = e_x - log2(r) + dx * P(dx)
+ // Absolute error bound:
+ // |log2(x) - log2_x.hi - log2_x.lo| < 2^-58.
+ // Relative error bound:
+ // |(log2_x.hi + log2_x.lo)/log2(x) - 1| < 2^-51.
+ double log2_x_hi = e_x + LOG2_R_DD[idx_x].hi; // Exact
+ // Error
+ double log2_x_lo = fputil::multiply_add(dx, p, LOG2_R_DD[idx_x].lo);
+
+ DoubleDouble log2_x = fputil::exact_add(log2_x_hi, log2_x_lo);
+
+ // To compute 2^(y * log2(x)), we break the exponent into 3 parts:
+ // y * log(2) = hi + mid + lo, where
+ // hi is an integer
+ // mid * 2^6 is an integer
+ // |lo| <= 2^-7
+ // Then:
+ // x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo,
+ // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements,
+ // and 2^lo ~ 1 + lo * P(lo).
+ // Thus, we have:
+ // hi + mid = 2^-6 * round( 2^6 * y * log2(x) )
+ // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6)
+ // bits, hence, if we use double precision to perform
+ // round( 2^6 * y * log2(x))
+ // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38
+
+ // In the following computations:
+ // y6 = 2^6 * y
+ // hm = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s)
+ // lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm.
+ double y6 = y * 0x1.0p6; // Exact.
+
+ DoubleDouble y6_log2_x = fputil::exact_mult(y6, log2_x.hi);
+ y6_log2_x.lo = fputil::multiply_add(y6, log2_x.lo, y6_log2_x.lo);
+
+ // Check overflow/underflow.
+ double scale = 1.0;
+
+ // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2
+ // Clamp the exponent part into smaller range that fits double precision.
+ // For those exponents that are out of range, the final conversion will round
+ // them correctly to inf/max float or 0/min float accordingly.
+ constexpr double UPPER_EXP_BOUND = 512.0 * 0x1.0p6;
+ if (LIBC_UNLIKELY(FPBits(y6_log2_x.hi).abs().get_val() >= UPPER_EXP_BOUND)) {
+ if (FPBits(y6_log2_x.hi).sign() == Sign::POS) {
+ scale = 0x1.0p512;
+ y6_log2_x.hi -= 512.0 * 64.0;
+ if (y6_log2_x.hi > 513.0 * 64.0)
+ y6_log2_x.hi = 513.0 * 64.0;
+ } else {
+ scale = 0x1.0p-512;
+ y6_log2_x.hi += 512.0 * 64.0;
+ if (y6_log2_x.hi < (-1076.0 + 512.0) * 64.0)
+ y6_log2_x.hi = -564.0 * 64.0;
+ }
+ }
+
+ double hm = fputil::nearest_integer(y6_log2_x.hi);
+
+ // lo6 = 2^6 * lo.
+ double lo6_hi = y6_log2_x.hi - hm;
+ double lo6 = lo6_hi + y6_log2_x.lo;
+
+ int hm_i = static_cast<int>(hm);
+ unsigned idx_y = static_cast<unsigned>(hm_i) & 0x3f;
+
+ // 2^hi
+ int64_t exp2_hi_i = static_cast<int64_t>(
+ static_cast<uint64_t>(static_cast<int64_t>(hm_i >> 6))
+ << FPBits::FRACTION_LEN);
+ // 2^mid
+ int64_t exp2_mid_hi_i =
+ static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].hi).uintval());
+ int64_t exp2_mid_lo_i =
+ static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].mid).uintval());
+ // (-1)^sign * 2^hi * 2^mid
+ // Error <= 2^hi * 2^-53
+ uint64_t exp2_hm_hi_i =
+ static_cast<uint64_t>(exp2_hi_i + exp2_mid_hi_i) + sign;
+ // The low part could be 0.
+ uint64_t exp2_hm_lo_i =
+ idx_y != 0 ? static_cast<uint64_t>(exp2_hi_i + exp2_mid_lo_i) + sign
+ : sign;
+ double exp2_hm_hi = FPBits(exp2_hm_hi_i).get_val();
+ double exp2_hm_lo = FPBits(exp2_hm_lo_i).get_val();
+
+ // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
+ // Generated by Sollya with:
+ // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]);
+ // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]);
+ // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60
+ constexpr double EXP2_COEFFS[] = {0x1p0,
+ 0x1.62e42fefa39efp-7,
+ 0x1.ebfbdff82a23ap-15,
+ 0x1.c6b08d7076268p-23,
+ 0x1.3b2ad33f8b48bp-31,
+ 0x1.5d870c4d84445p-40};
+
+ double lo6_sqr = lo6 * lo6;
+
+ double d0 = fputil::multiply_add(lo6, EXP2_COEFFS[2], EXP2_COEFFS[1]);
+ double d1 = fputil::multiply_add(lo6, EXP2_COEFFS[4], EXP2_COEFFS[3]);
+ double pp = fputil::polyeval(lo6_sqr, d0, d1, EXP2_COEFFS[5]);
+
+ double r = fputil::multiply_add(exp2_hm_hi * lo6, pp, exp2_hm_lo);
+ r += exp2_hm_hi;
+
+ return r * scale;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/nvptx/CMakeLists.txt b/libc/src/math/nvptx/CMakeLists.txt
index 4295ebf9ff630..96a0c3c03ff8e 100644
--- a/libc/src/math/nvptx/CMakeLists.txt
+++ b/libc/src/math/nvptx/CMakeLists.txt
@@ -409,18 +409,6 @@ add_entrypoint_object(
VENDOR
)
-add_entrypoint_object(
- pow
- SRCS
- pow.cpp
- HDRS
- ../pow.h
- COMPILE_OPTIONS
- ${bitcode_link_flags}
- -O2
- VENDOR
-)
-
add_entrypoint_object(
powi
SRCS
diff --git a/libc/src/math/nvptx/pow.cpp b/libc/src/math/nvptx/pow.cpp
deleted file mode 100644
index c51dd56f55387..0000000000000
--- a/libc/src/math/nvptx/pow.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- Implementation of the pow function for GPU ------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/math/pow.h"
-#include "src/__support/common.h"
-
-#include "declarations.h"
-#include "src/__support/macros/config.h"
-
-namespace LIBC_NAMESPACE_DECL {
-
-LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) { return __nv_pow(x, y); }
-
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 2fbfa741c037e..ecc8ff139edcc 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -2045,6 +2045,17 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ pow_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ pow_test.cpp
+ DEPENDS
+ libc.src.math.pow
+)
+
add_fp_unittest(
powf_test
NEED_MPFR
diff --git a/libc/test/src/math/pow_test.cpp b/libc/test/src/math/pow_test.cpp
new file mode 100644
index 0000000000000..52416e9610c3e
--- /dev/null
+++ b/libc/test/src/math/pow_test.cpp
@@ -0,0 +1,114 @@
+//===-- Unittests for pow ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/math_macros.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/pow.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcPowTest = LIBC_NAMESPACE::testing::FPTest<double>;
+using LIBC_NAMESPACE::testing::tlog;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+TEST_F(LlvmLibcPowTest, TrickyInputs) {
+ constexpr mpfr::BinaryInput<double> INPUTS[] = {
+ {0x1.0853408534085p-2, 0x1.0D148E03BCBA8p-1},
+ {0x1.65FBD65FBD657p-1, 0x1.F10D148E03BB6p+1},
+ };
+
+ for (auto input : INPUTS) {
+ double x = input.x;
+ double y = input.y;
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input,
+ LIBC_NAMESPACE::pow(x, y), 0.5);
+ }
+}
+
+TEST_F(LlvmLibcPowTest, InFloatRange) {
+ constexpr uint64_t X_COUNT = 123;
+ constexpr uint64_t X_START = FPBits(0.25).uintval();
+ constexpr uint64_t X_STOP = FPBits(4.0).uintval();
+ constexpr uint64_t X_STEP = (X_STOP - X_START) / X_COUNT;
+
+ constexpr uint64_t Y_COUNT = 137;
+ constexpr uint64_t Y_START = FPBits(0.25).uintval();
+ constexpr uint64_t Y_STOP = FPBits(4.0).uintval();
+ constexpr uint64_t Y_STEP = (Y_STOP - Y_START) / Y_COUNT;
+
+ auto test = [&](mpfr::RoundingMode rounding_mode) {
+ mpfr::ForceRoundingMode __r(rounding_mode);
+ if (!__r.success)
+ return;
+
+ uint64_t fails = 0;
+ uint64_t count = 0;
+ uint64_t cc = 0;
+ double mx, my, mr = 0.0;
+ double tol = 1.5;
+
+ for (uint64_t i = 0, v = X_START; i <= X_COUNT; ++i, v += X_STEP) {
+ double x = FPBits(v).get_val();
+ if (FPBits(x).is_inf_or_nan() || x < 0.0)
+ continue;
+
+ for (uint64_t j = 0, w = Y_START; j <= Y_COUNT; ++j, w += Y_STEP) {
+ double y = FPBits(w).get_val();
+ if (FPBits(y).is_inf_or_nan())
+ continue;
+
+ double result = LIBC_NAMESPACE::pow(x, y);
+ ++cc;
+ if (FPBits(result).is_inf_or_nan())
+ continue;
+
+ ++count;
+ mpfr::BinaryInput<double> inputs{x, y};
+
+ if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Pow, inputs,
+ result, 1.5, rounding_mode)) {
+ ++fails;
+ while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(
+ mpfr::Operation::Pow, inputs, result, tol, rounding_mode)) {
+ mx = x;
+ my = y;
+ mr = result;
+
+ if (tol > 1000.0)
+ break;
+
+ tol *= 2.0;
+ }
+ }
+ }
+ }
+ if (fails || (count < cc)) {
+ tlog << " Pow failed: " << fails << "/" << count << "/" << cc
+ << " tests.\n"
+ << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
+ }
+ if (fails) {
+ mpfr::BinaryInput<double> inputs{mx, my};
+ EXPECT_MPFR_MATCH(mpfr::Operation::Pow, inputs, mr, 1.5, rounding_mode);
+ }
+ };
+
+ tlog << " Test Rounding To Nearest...\n";
+ test(mpfr::RoundingMode::Nearest);
+
+ tlog << " Test Rounding Downward...\n";
+ test(mpfr::RoundingMode::Downward);
+
+ tlog << " Test Rounding Upward...\n";
+ test(mpfr::RoundingMode::Upward);
+
+ tlog << " Test Rounding Toward Zero...\n";
+ test(mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 2aba5abb5a4d8..f675b3c7814df 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3695,6 +3695,16 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ pow_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ pow_test.cpp
+ DEPENDS
+ libc.src.math.pow
+)
+
add_fp_unittest(
powf_test
SUITE
diff --git a/libc/test/src/math/smoke/pow_test.cpp b/libc/test/src/math/smoke/pow_test.cpp
new file mode 100644
index 0000000000000..18ad88c0d893c
--- /dev/null
+++ b/libc/test/src/math/smoke/pow_test.cpp
@@ -0,0 +1,188 @@
+//===-- Unittests for pow -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/math_macros.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/pow.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcPowTest = LIBC_NAMESPACE::testing::FPTest<double>;
+using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode;
+using LIBC_NAMESPACE::fputil::testing::RoundingMode;
+
+TEST_F(LlvmLibcPowTest, SpecialNumbers) {
+ constexpr double neg_odd_integer = -3.0;
+ constexpr double neg_even_integer = -6.0;
+ constexpr double neg_non_integer = -1.1;
+ constexpr double pos_odd_integer = 5.0;
+ constexpr double pos_even_integer = 8.0;
+ constexpr double pos_non_integer = 1.1;
+
+ for (int i = 0; i < N_ROUNDING_MODES; ++i) {
+ ForceRoundingMode __r(ROUNDING_MODES[i]);
+ if (!__r.success)
+ continue;
+
+ // pow( 0.0, exponent )
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, neg_odd_integer),
+ FE_DIVBYZERO);
+ EXPECT_FP_EQ_WITH_EXCEPTION(
+ inf, LIBC_NAMESPACE::pow(zero, neg_even_integer), FE_DIVBYZERO);
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, neg_non_integer),
+ FE_DIVBYZERO);
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, pos_odd_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, pos_even_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, pos_non_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(zero, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(zero, neg_zero));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(zero, inf));
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, neg_inf),
+ FE_DIVBYZERO);
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(zero, aNaN));
+
+ // pow( -0.0, exponent )
+ EXPECT_FP_EQ_WITH_EXCEPTION(
+ neg_inf, LIBC_NAMESPACE::pow(neg_zero, neg_odd_integer), FE_DIVBYZERO);
+ EXPECT_FP_EQ_WITH_EXCEPTION(
+ inf, LIBC_NAMESPACE::pow(neg_zero, neg_even_integer), FE_DIVBYZERO);
+ EXPECT_FP_EQ_WITH_EXCEPTION(
+ inf, LIBC_NAMESPACE::pow(neg_zero, neg_non_integer), FE_DIVBYZERO);
+ EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_zero, pos_odd_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_zero, pos_even_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_zero, pos_non_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_zero, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_zero, neg_zero));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(neg_zero, inf));
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(neg_zero, neg_inf),
+ FE_DIVBYZERO);
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(neg_zero, aNaN));
+
+ // pow( 1.0, exponent )
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, 1.0));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, -1.0));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_odd_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_even_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_non_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, pos_odd_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, pos_even_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, pos_non_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, inf));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_inf));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, aNaN));
+
+ // pow( 1.0, exponent )
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, neg_zero));
+ EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, 1.0));
+ EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, -1.0));
+ EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, neg_odd_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, neg_even_integer));
+ EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::pow(-1.0, neg_non_integer),
+ FE_INVALID);
+ EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, pos_odd_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, pos_even_integer));
+ EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::pow(-1.0, pos_non_integer),
+ FE_INVALID);
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, inf));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, neg_inf));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(-1.0, aNaN));
+
+ // pow( inf, exponent )
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(inf, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(inf, neg_zero));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, 1.0));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, -1.0));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_odd_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_even_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_non_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, pos_odd_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, pos_even_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, pos_non_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, inf));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_inf));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(inf, aNaN));
+
+ // pow( -inf, exponent )
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_inf, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_inf, neg_zero));
+ EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::pow(neg_inf, 1.0));
+ EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_inf, -1.0));
+ EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_inf, neg_odd_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, neg_even_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, neg_non_integer));
+ EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::pow(neg_inf, pos_odd_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, pos_even_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, pos_non_integer));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, inf));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, neg_inf));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(neg_inf, aNaN));
+
+ // pow ( aNaN, exponent )
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(aNaN, zero));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(aNaN, neg_zero));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, 1.0));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, -1.0));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_odd_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_even_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_non_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, pos_odd_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, pos_even_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, pos_non_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, inf));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_inf));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, aNaN));
+
+ // pow ( base, inf )
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(0.1, inf));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(-0.1, inf));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(1.1, inf));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(-1.1, inf));
+
+ // pow ( base, -inf )
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(0.1, neg_inf));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(-0.1, neg_inf));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(1.1, neg_inf));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(-1.1, neg_inf));
+
+ // Exact powers of 2:
+ // TODO: Enable these tests when we use exp2.
+ // EXPECT_FP_EQ(0x1.0p15, LIBC_NAMESPACE::pow(2.0, 15.0));
+ // EXPECT_FP_EQ(0x1.0p126, LIBC_NAMESPACE::pow(2.0, 126.0));
+ // EXPECT_FP_EQ(0x1.0p-45, LIBC_NAMESPACE::pow(2.0, -45.0));
+ // EXPECT_FP_EQ(0x1.0p-126, LIBC_NAMESPACE::pow(2.0, -126.0));
+ // EXPECT_FP_EQ(0x1.0p-149, LIBC_NAMESPACE::pow(2.0, -149.0));
+
+ // Exact powers of 10:
+ // TODO: Enable these tests when we use exp10
+ // EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(10.0, 0.0));
+ // EXPECT_FP_EQ(10.0, LIBC_NAMESPACE::pow(10.0, 1.0));
+ // EXPECT_FP_EQ(100.0, LIBC_NAMESPACE::pow(10.0, 2.0));
+ // EXPECT_FP_EQ(1000.0, LIBC_NAMESPACE::pow(10.0, 3.0));
+ // EXPECT_FP_EQ(10000.0, LIBC_NAMESPACE::pow(10.0, 4.0));
+ // EXPECT_FP_EQ(100000.0, LIBC_NAMESPACE::pow(10.0, 5.0));
+ // EXPECT_FP_EQ(1000000.0, LIBC_NAMESPACE::pow(10.0, 6.0));
+ // EXPECT_FP_EQ(10000000.0, LIBC_NAMESPACE::pow(10.0, 7.0));
+ // EXPECT_FP_EQ(100000000.0, LIBC_NAMESPACE::pow(10.0, 8.0));
+ // EXPECT_FP_EQ(1000000000.0, LIBC_NAMESPACE::pow(10.0, 9.0));
+ // EXPECT_FP_EQ(10000000000.0, LIBC_NAMESPACE::pow(10.0, 10.0));
+
+ // Overflow / Underflow:
+ if (ROUNDING_MODES[i] != RoundingMode::Downward &&
+ ROUNDING_MODES[i] != RoundingMode::TowardZero) {
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(3.1, 2001.0),
+ FE_OVERFLOW);
+ }
+ if (ROUNDING_MODES[i] != RoundingMode::Upward) {
+ EXPECT_FP_EQ_WITH_EXCEPTION(0.0, LIBC_NAMESPACE::pow(3.1, -2001.0),
+ FE_UNDERFLOW);
+ }
+ }
+}
>From 31ca1f5a83fa709a451bff0d03a27e2a0f55e97e Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Mon, 5 Aug 2024 13:35:13 +0000
Subject: [PATCH 2/3] Address comments.
---
libc/src/math/generic/CMakeLists.txt | 5 +-
libc/src/math/generic/pow.cpp | 21 ++++--
libc/test/src/math/pow_test.cpp | 6 +-
libc/test/src/math/smoke/CMakeLists.txt | 1 +
libc/test/src/math/smoke/pow_test.cpp | 99 ++++++++++++-------------
5 files changed, 70 insertions(+), 62 deletions(-)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 8d59f5de9c310..2fe6cc4d39d94 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1560,12 +1560,15 @@ add_entrypoint_object(
../pow.h
DEPENDS
.common_constants
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.__support.CPP.bit
+ libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
libc.src.__support.FPUtil.sqrt
libc.src.__support.macros.optimization
COMPILE_OPTIONS
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
index 49f3ca602c207..c7dc5135d76d4 100644
--- a/libc/src/math/generic/pow.cpp
+++ b/libc/src/math/generic/pow.cpp
@@ -8,12 +8,15 @@
#include "src/math/pow.h"
#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/FPUtil/sqrt.h" // Speedup for pow(x, 1/2) = sqrt(x)
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
@@ -240,20 +243,23 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
if (y_a > 0x43d7'4910'd52d'3052) {
if (y_a >= 0x7ff0'0000'0000'0000) {
// y is inf or nan
- if (y_mant != 0)
+ if (y_mant != 0) {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return (x_u == FPBits::one().uintval()) ? 1.0 : y;
+ }
// Now y is +-Inf
- if (x_abs.is_nan())
+ if (x_abs.is_nan()) {
// pow(NaN, +-Inf) = NaN
return x;
+ }
- if (x_a == 0x3ff0'0000'0000'0000)
+ if (x_a == 0x3ff0'0000'0000'0000) {
// pow(+-1, +-Inf) = 1.0
return 1.0;
+ }
if (x_a == 0 && y_sign) {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
@@ -277,14 +283,15 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
// y is finite and non-zero.
- if (x_u == FPBits::one().uintval())
+ if (x_u == FPBits::one().uintval()) {
// pow(1, y) = 1
return 1.0;
+ }
// TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y).
if (x_a == 0) {
- const bool out_is_neg = x_sign && is_odd_integer(y);
+ bool out_is_neg = x_sign && is_odd_integer(y);
if (y_sign) {
// pow(0, negative number) = inf
fputil::set_errno_if_required(EDOM);
@@ -296,7 +303,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
}
if (x_a == FPBits::inf().uintval()) {
- const bool out_is_neg = x_sign && is_odd_integer(y);
+ bool out_is_neg = x_sign && is_odd_integer(y);
if (y_sign)
return out_is_neg ? -0.0f : 0.0f;
return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
diff --git a/libc/test/src/math/pow_test.cpp b/libc/test/src/math/pow_test.cpp
index 52416e9610c3e..468a6bbee4b2c 100644
--- a/libc/test/src/math/pow_test.cpp
+++ b/libc/test/src/math/pow_test.cpp
@@ -1,4 +1,4 @@
-//===-- Unittests for pow ------------------------------------------------===//
+//===-- Unittests for pow -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "hdr/math_macros.h"
-#include "src/__support/FPUtil/FPBits.h"
#include "src/math/pow.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -51,7 +49,7 @@ TEST_F(LlvmLibcPowTest, InFloatRange) {
uint64_t fails = 0;
uint64_t count = 0;
uint64_t cc = 0;
- double mx, my, mr = 0.0;
+ double mx = 0.0, my = 0.0, mr = 0.0;
double tol = 1.5;
for (uint64_t i = 0, v = X_START; i <= X_COUNT; ++i, v += X_STEP) {
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index f675b3c7814df..ebd9ca02d08e8 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3702,6 +3702,7 @@ add_fp_unittest(
SRCS
pow_test.cpp
DEPENDS
+ libc.hdr.fenv_macros
libc.src.math.pow
)
diff --git a/libc/test/src/math/smoke/pow_test.cpp b/libc/test/src/math/smoke/pow_test.cpp
index 18ad88c0d893c..4f2a3a28c0dcb 100644
--- a/libc/test/src/math/smoke/pow_test.cpp
+++ b/libc/test/src/math/smoke/pow_test.cpp
@@ -6,8 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "hdr/math_macros.h"
-#include "src/__support/FPUtil/FPBits.h"
+#include "hdr/fenv_macros.h"
#include "src/math/pow.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
@@ -17,12 +16,12 @@ using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode;
using LIBC_NAMESPACE::fputil::testing::RoundingMode;
TEST_F(LlvmLibcPowTest, SpecialNumbers) {
- constexpr double neg_odd_integer = -3.0;
- constexpr double neg_even_integer = -6.0;
- constexpr double neg_non_integer = -1.1;
- constexpr double pos_odd_integer = 5.0;
- constexpr double pos_even_integer = 8.0;
- constexpr double pos_non_integer = 1.1;
+ constexpr double NEG_ODD_INTEGER = -3.0;
+ constexpr double NEG_EVEN_INTEGER = -6.0;
+ constexpr double NEG_NON_INTEGER = -1.1;
+ constexpr double POS_ODD_INTEGER = 5.0;
+ constexpr double POS_EVEN_INTEGER = 8.0;
+ constexpr double POS_NON_INTEGER = 1.1;
for (int i = 0; i < N_ROUNDING_MODES; ++i) {
ForceRoundingMode __r(ROUNDING_MODES[i]);
@@ -30,15 +29,15 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
continue;
// pow( 0.0, exponent )
- EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, neg_odd_integer),
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, NEG_ODD_INTEGER),
FE_DIVBYZERO);
EXPECT_FP_EQ_WITH_EXCEPTION(
- inf, LIBC_NAMESPACE::pow(zero, neg_even_integer), FE_DIVBYZERO);
- EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, neg_non_integer),
+ inf, LIBC_NAMESPACE::pow(zero, NEG_EVEN_INTEGER), FE_DIVBYZERO);
+ EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::pow(zero, NEG_NON_INTEGER),
FE_DIVBYZERO);
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, pos_odd_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, pos_even_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, pos_non_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, POS_ODD_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, POS_EVEN_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(zero, POS_NON_INTEGER));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(zero, zero));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(zero, neg_zero));
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(zero, inf));
@@ -48,14 +47,14 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
// pow( -0.0, exponent )
EXPECT_FP_EQ_WITH_EXCEPTION(
- neg_inf, LIBC_NAMESPACE::pow(neg_zero, neg_odd_integer), FE_DIVBYZERO);
+ neg_inf, LIBC_NAMESPACE::pow(neg_zero, NEG_ODD_INTEGER), FE_DIVBYZERO);
EXPECT_FP_EQ_WITH_EXCEPTION(
- inf, LIBC_NAMESPACE::pow(neg_zero, neg_even_integer), FE_DIVBYZERO);
+ inf, LIBC_NAMESPACE::pow(neg_zero, NEG_EVEN_INTEGER), FE_DIVBYZERO);
EXPECT_FP_EQ_WITH_EXCEPTION(
- inf, LIBC_NAMESPACE::pow(neg_zero, neg_non_integer), FE_DIVBYZERO);
- EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_zero, pos_odd_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_zero, pos_even_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_zero, pos_non_integer));
+ inf, LIBC_NAMESPACE::pow(neg_zero, NEG_NON_INTEGER), FE_DIVBYZERO);
+ EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_zero, POS_ODD_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_zero, POS_EVEN_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_zero, POS_NON_INTEGER));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_zero, zero));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_zero, neg_zero));
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(neg_zero, inf));
@@ -68,12 +67,12 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_zero));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, 1.0));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, -1.0));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_odd_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_even_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_non_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, pos_odd_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, pos_even_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, pos_non_integer));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, NEG_ODD_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, NEG_EVEN_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, NEG_NON_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, POS_ODD_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, POS_EVEN_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, POS_NON_INTEGER));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, inf));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, neg_inf));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(1.0, aNaN));
@@ -83,13 +82,13 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, neg_zero));
EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, 1.0));
EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, -1.0));
- EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, neg_odd_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, neg_even_integer));
- EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::pow(-1.0, neg_non_integer),
+ EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, NEG_ODD_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, NEG_EVEN_INTEGER));
+ EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::pow(-1.0, NEG_NON_INTEGER),
FE_INVALID);
- EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, pos_odd_integer));
- EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, pos_even_integer));
- EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::pow(-1.0, pos_non_integer),
+ EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::pow(-1.0, POS_ODD_INTEGER));
+ EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, POS_EVEN_INTEGER));
+ EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::pow(-1.0, POS_NON_INTEGER),
FE_INVALID);
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, inf));
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(-1.0, neg_inf));
@@ -100,12 +99,12 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(inf, neg_zero));
EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, 1.0));
EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, -1.0));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_odd_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_even_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_non_integer));
- EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, pos_odd_integer));
- EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, pos_even_integer));
- EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, pos_non_integer));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, NEG_ODD_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, NEG_EVEN_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, NEG_NON_INTEGER));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, POS_ODD_INTEGER));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, POS_EVEN_INTEGER));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, POS_NON_INTEGER));
EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(inf, inf));
EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(inf, neg_inf));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(inf, aNaN));
@@ -115,12 +114,12 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(neg_inf, neg_zero));
EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::pow(neg_inf, 1.0));
EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_inf, -1.0));
- EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_inf, neg_odd_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, neg_even_integer));
- EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, neg_non_integer));
- EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::pow(neg_inf, pos_odd_integer));
- EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, pos_even_integer));
- EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, pos_non_integer));
+ EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::pow(neg_inf, NEG_ODD_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, NEG_EVEN_INTEGER));
+ EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, NEG_NON_INTEGER));
+ EXPECT_FP_EQ(neg_inf, LIBC_NAMESPACE::pow(neg_inf, POS_ODD_INTEGER));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, POS_EVEN_INTEGER));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, POS_NON_INTEGER));
EXPECT_FP_EQ(inf, LIBC_NAMESPACE::pow(neg_inf, inf));
EXPECT_FP_EQ(zero, LIBC_NAMESPACE::pow(neg_inf, neg_inf));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(neg_inf, aNaN));
@@ -130,12 +129,12 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(aNaN, neg_zero));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, 1.0));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, -1.0));
- EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_odd_integer));
- EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_even_integer));
- EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_non_integer));
- EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, pos_odd_integer));
- EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, pos_even_integer));
- EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, pos_non_integer));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, NEG_ODD_INTEGER));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, NEG_EVEN_INTEGER));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, NEG_NON_INTEGER));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, POS_ODD_INTEGER));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, POS_EVEN_INTEGER));
+ EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, POS_NON_INTEGER));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, inf));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, neg_inf));
EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(aNaN, aNaN));
>From 530b04d8ff3f376365ac3ceccedad5e9de28c932 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Mon, 5 Aug 2024 15:13:27 +0000
Subject: [PATCH 3/3] Update comments and hex integer style.
---
libc/src/math/generic/pow.cpp | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
index c7dc5135d76d4..8242a62ba1c31 100644
--- a/libc/src/math/generic/pow.cpp
+++ b/libc/src/math/generic/pow.cpp
@@ -223,7 +223,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
// So if |y| > 1075 / log2(1 - 2^-53), and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence x^y will either overflow or underflow if x is not zero.
- if (LIBC_UNLIKELY(y_mant == 0 || y_a > 0x43d74910d52d3052 ||
+ if (LIBC_UNLIKELY(y_mant == 0 || y_a > 0x43d7'4910'd52d'3052 ||
x_u == FPBits::one().uintval() ||
x_u >= FPBits::inf().uintval() ||
x_u < FPBits::min_normal().uintval())) {
@@ -240,6 +240,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
return y_sign ? (1.0 / (x * x)) : (x * x);
}
+ // |y| > 1075 / log2(1 - 2^-53).
if (y_a > 0x43d7'4910'd52d'3052) {
if (y_a >= 0x7ff0'0000'0000'0000) {
// y is inf or nan
@@ -268,16 +269,16 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
return FPBits::inf().get_val();
}
// pow (|x| < 1, -inf) = +inf
- // pow (|x| < 1, +inf) = 0.0f
- // pow (|x| > 1, -inf) = 0.0f
+ // pow (|x| < 1, +inf) = 0.0
+ // pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return ((x_a < FPBits::one().uintval()) == y_sign)
? FPBits::inf().get_val()
: 0.0;
}
- // x^y will be overflow / underflow in double precision. Set y to a
+ // x^y will overflow / underflow in double precision. Set y to a
// large enough exponent but not too large, so that the computations
- // won't be overflow in double precision.
+ // won't overflow in double precision.
y = y_sign ? -0x1.0p100 : 0x1.0p100;
}
@@ -305,7 +306,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
if (x_a == FPBits::inf().uintval()) {
bool out_is_neg = x_sign && is_odd_integer(y);
if (y_sign)
- return out_is_neg ? -0.0f : 0.0f;
+ return out_is_neg ? -0.0 : 0.0;
return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
}
@@ -327,7 +328,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
x = -x;
if (is_odd_integer(y))
// sign = -1.0;
- sign = 0x8000'0000'0000'0000ULL;
+ sign = 0x8000'0000'0000'0000;
} else {
// pow( negative, non-integer ) = NaN
fputil::set_errno_if_required(EDOM);
@@ -349,7 +350,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
unsigned idx_x = static_cast<unsigned>(x_mant >> (FPBits::FRACTION_LEN - 7));
// Add the hidden bit to the mantissa.
// 1 <= m_x < 2
- FPBits m_x = FPBits(x_mant | 0x3FF0'0000'0000'0000);
+ FPBits m_x = FPBits(x_mant | 0x3ff0'0000'0000'0000);
// Reduced argument for log2(m_x):
// dx = r * m_x - 1.
@@ -362,7 +363,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
#ifdef LIBC_TARGET_CPU_HAS_FMA
dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
#else
- double c = FPBits(m_x.uintval() & 0x3FFF'E000'0000'0000).get_val();
+ double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val();
dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
#endif // LIBC_TARGET_CPU_HAS_FMA
More information about the libc-commits
mailing list