[libcxx-commits] [libcxx] [libc++] Implement std::gcd using the binary version (PR #77747)
via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Apr 29 05:26:36 PDT 2024
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/77747
>From 0c29865749256a10eaf787fbaa116e8c24fc87ca Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Fri, 12 Jan 2024 08:01:15 +0100
Subject: [PATCH 1/2] [libc++] Fix ubsan error in __ct_abs
UBsan reports
runtime error: negation of -2147483648 cannot be represented in type
'int'; cast to an unsigned type to negate this value to itself
Fix this by performing the suggested casts.
---
libcxx/include/__numeric/gcd_lcm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 48df2338051e29..37930d29f35a1e 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -39,7 +39,7 @@ struct __ct_abs<_Result, _Source, true> {
if (__t >= 0)
return __t;
if (__t == numeric_limits<_Source>::min())
- return -static_cast<_Result>(__t);
+ return static_cast<_Result>(-static_cast<std::make_unsigned_t<_Result>>(__t));
return -__t;
}
};
>From 59bbb5361d31b6bfafe03c483d55ca15191ec4fb Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Mon, 13 Mar 2023 21:06:01 +0100
Subject: [PATCH 2/2] [libc++] Implement std::gcd using the binary version
The binary version is four times faster than current implementation
in my setup, and generally considered a better implementation.
Code inspired by https://en.algorithmica.org/hpc/algorithms/gcd/
which itself is inspired by https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
Hybrid approach and benchmarks inspired by `ylchapuy <https://github.com/ylchapuy>`.
Fix #77648
---
libcxx/benchmarks/CMakeLists.txt | 3 +-
libcxx/benchmarks/numeric/gcd.bench.cpp | 53 ++++++++++++
libcxx/include/__numeric/gcd_lcm.h | 44 +++++++++-
.../test/libcxx/transitive_includes/cxx03.csv | 1 +
.../test/libcxx/transitive_includes/cxx11.csv | 1 +
.../test/libcxx/transitive_includes/cxx14.csv | 1 +
.../test/libcxx/transitive_includes/cxx17.csv | 1 +
.../test/libcxx/transitive_includes/cxx20.csv | 1 +
.../test/libcxx/transitive_includes/cxx26.csv | 23 +++++
.../numeric.ops/numeric.ops.gcd/gcd.pass.cpp | 83 ++++++++++++++++++-
10 files changed, 207 insertions(+), 4 deletions(-)
create mode 100644 libcxx/benchmarks/numeric/gcd.bench.cpp
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 5dc3be0c367e5e..93b549a316e385 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -122,7 +122,7 @@ endif()
add_library( cxx-benchmarks-flags-libcxx INTERFACE)
target_link_libraries( cxx-benchmarks-flags-libcxx INTERFACE cxx-benchmarks-flags)
target_compile_options(cxx-benchmarks-flags-libcxx INTERFACE ${SANITIZER_FLAGS} -Wno-user-defined-literals -Wno-suggest-override)
-target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
+target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -lm -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
set(libcxx_benchmark_targets)
@@ -220,6 +220,7 @@ set(BENCHMARK_TESTS
lexicographical_compare_three_way.bench.cpp
map.bench.cpp
monotonic_buffer.bench.cpp
+ numeric/gcd.bench.cpp
ordered_set.bench.cpp
shared_mutex_vs_mutex.bench.cpp
stop_token.bench.cpp
diff --git a/libcxx/benchmarks/numeric/gcd.bench.cpp b/libcxx/benchmarks/numeric/gcd.bench.cpp
new file mode 100644
index 00000000000000..d52ed873be0f1c
--- /dev/null
+++ b/libcxx/benchmarks/numeric/gcd.bench.cpp
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <array>
+#include <benchmark/benchmark.h>
+#include <cstring>
+#include <numeric>
+#include <random>
+
+template <class T>
+static std::array<T, 1000> generate(std::uniform_int_distribution<T> distribution = std::uniform_int_distribution<T>{
+ std::numeric_limits<T>::min(), std::numeric_limits<T>::max()}) {
+ std::mt19937 generator;
+ std::array<T, 1000> result;
+ std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
+ return result;
+}
+
+static void bm_gcd_random(benchmark::State& state) {
+ std::array data = generate<int>();
+ while (state.KeepRunningBatch(data.size()))
+ for (auto v0 : data)
+ for (auto v1 : data)
+ benchmark::DoNotOptimize(std::gcd(v0, v1));
+}
+BENCHMARK(bm_gcd_random);
+
+static void bm_gcd_trivial(benchmark::State& state) {
+ int lhs = ~static_cast<T>(0), rhs = 1;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(lhs);
+ benchmark::DoNotOptimize(rhs);
+ benchmark::DoNotOptimize(std::gcd(lhs, rhs));
+ }
+}
+BENCHMARK(bm_gcd_trivial);
+
+static void bm_gcd_complex(benchmark::State& state) {
+ int lhs = 2971215073, rhs = 1836311903;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(lhs);
+ benchmark::DoNotOptimize(rhs);
+ benchmark::DoNotOptimize(std::gcd(lhs, rhs));
+ }
+}
+BENCHMARK(bm_gcd_complex);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 37930d29f35a1e..9b53d0799cf21d 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -10,7 +10,9 @@
#ifndef _LIBCPP___NUMERIC_GCD_LCM_H
#define _LIBCPP___NUMERIC_GCD_LCM_H
+#include <__algorithm/min.h>
#include <__assert>
+#include <__bit/countr.h>
#include <__config>
#include <__type_traits/common_type.h>
#include <__type_traits/is_integral.h>
@@ -50,9 +52,47 @@ struct __ct_abs<_Result, _Source, false> {
};
template <class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
static_assert((!is_signed<_Tp>::value), "");
- return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);
+
+ // From: https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor
+ //
+ // If power of two divides both numbers, we can push it out.
+ // - gcd( 2^x * a, 2^x * b) = 2^x * gcd(a, b)
+ //
+ // If and only if exactly one number is even, we can divide that number by that power.
+ // - if a, b are odd, then gcd(2^x * a, b) = gcd(a, b)
+ //
+ // And standard gcd algorithm where instead of modulo, minus is used.
+
+ if (__a < __b) {
+ _Tp __tmp = __b;
+ __b = __a;
+ __a = __tmp;
+ }
+ if (__b == 0)
+ return __a;
+ __a %= __b; // Make both argument of the same size, and early result in the easy case.
+ if (__a == 0)
+ return __b;
+
+ int __az = std::__countr_zero(__a);
+ int __bz = std::__countr_zero(__b);
+ int __shift = std::min(__az, __bz);
+ __a >>= __az;
+ __b >>= __bz;
+ do {
+ _Tp __diff = __a - __b;
+ if (__a > __b) {
+ __a = __b;
+ __b = __diff;
+ } else {
+ __b = __b - __a;
+ }
+ if (__diff != 0)
+ __b >>= std::__countr_zero(__diff);
+ } while (__b != 0);
+ return __a << __shift;
}
template <class _Tp, class _Up>
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index c2250899a8002b..00a985c399af9f 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -558,6 +558,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 3e929e8f940967..692e11643220d2 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -563,6 +563,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 422db19b6bb8ac..75ae33775971ab 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -566,6 +566,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 422db19b6bb8ac..75ae33775971ab 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -566,6 +566,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 6b80790a9d19b5..c517992ae1cbf9 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -576,6 +576,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index ea01e413458500..fc007aa60a5a90 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -176,6 +176,29 @@ experimental/simd limits
experimental/type_traits initializer_list
experimental/type_traits type_traits
experimental/utility utility
+experimental/vector experimental/memory_resource
+experimental/vector vector
+ext/hash_map algorithm
+ext/hash_map cmath
+ext/hash_map cstddef
+ext/hash_map cstdint
+ext/hash_map cstring
+ext/hash_map functional
+ext/hash_map initializer_list
+ext/hash_map limits
+ext/hash_map new
+ext/hash_map stdexcept
+ext/hash_map string
+ext/hash_set algorithm
+ext/hash_set cmath
+ext/hash_set cstddef
+ext/hash_set cstdint
+ext/hash_set cstring
+ext/hash_set functional
+ext/hash_set initializer_list
+ext/hash_set limits
+ext/hash_set new
+ext/hash_set string
filesystem compare
filesystem cstddef
filesystem cstdint
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
index 831c226f9c8ea1..555b4a05ba8a1b 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
@@ -17,6 +17,7 @@
#include <cassert>
#include <climits>
#include <cstdint>
+#include <random>
#include <type_traits>
#include "test_macros.h"
@@ -48,6 +49,68 @@ constexpr bool test0(int in1, int in2, int out)
return true;
}
+template <typename T>
+T basic_gcd_(T m, T n) {
+ return n == 0 ? m : basic_gcd_<T>(n, m % n);
+}
+
+template <typename T>
+T basic_gcd(T m, T n) {
+ using Tp = std::make_unsigned_t<T>;
+ if (m < 0 && m != std::numeric_limits<T>::min())
+ m = -m;
+ if (n < 0 && n != std::numeric_limits<T>::min())
+ n = -n;
+ return basic_gcd_(static_cast<Tp>(m), static_cast<Tp>(n));
+}
+
+template <typename Input>
+void do_fuzzy_tests() {
+ std::mt19937 gen(1938);
+ std::uniform_int_distribution<Input> distrib;
+
+ constexpr int nb_rounds = 10000;
+ for (int i = 0; i < nb_rounds; ++i) {
+ Input n = distrib(gen);
+ Input m = distrib(gen);
+ assert(std::gcd(n, m) == basic_gcd(n, m));
+ }
+}
+
+template <typename Input>
+void do_limit_tests() {
+ Input inputs[] = {
+ std::numeric_limits<Input>::min(),
+ std::numeric_limits<Input>::max(),
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ (Input)-1,
+ (Input)-2,
+ (Input)-3,
+ (Input)-4,
+ (Input)-5,
+ (Input)-6,
+ (Input)-7,
+ (Input)-8,
+ (Input)-9,
+ (Input)-10,
+ };
+
+ for (auto n : inputs) {
+ for (auto m : inputs) {
+ assert(std::gcd(n, m) == basic_gcd(n, m));
+ }
+ }
+}
template <typename Input1, typename Input2 = Input1>
constexpr bool do_test(int = 0)
@@ -143,5 +206,23 @@ int main(int argc, char**)
assert(res == 2);
}
- return 0;
+ do_fuzzy_tests<std::int8_t>();
+ do_fuzzy_tests<std::int16_t>();
+ do_fuzzy_tests<std::int32_t>();
+ do_fuzzy_tests<std::int64_t>();
+ do_fuzzy_tests<std::uint8_t>();
+ do_fuzzy_tests<std::uint16_t>();
+ do_fuzzy_tests<std::uint32_t>();
+ do_fuzzy_tests<std::uint64_t>();
+
+ do_limit_tests<std::int8_t>();
+ do_limit_tests<std::int16_t>();
+ do_limit_tests<std::int32_t>();
+ do_limit_tests<std::int64_t>();
+ do_limit_tests<std::uint8_t>();
+ do_limit_tests<std::uint16_t>();
+ do_limit_tests<std::uint32_t>();
+ do_limit_tests<std::uint64_t>();
+
+ return 0;
}
More information about the libcxx-commits
mailing list