[libcxx-commits] [libcxx] [libc++] Implement std::gcd using the binary version (PR #77747)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Apr 28 09:32:00 PDT 2024
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/77747
>From 0c29865749256a10eaf787fbaa116e8c24fc87ca Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Fri, 12 Jan 2024 08:01:15 +0100
Subject: [PATCH 1/5] [libc++] Fix ubsan error in __ct_abs
UBsan reports
runtime error: negation of -2147483648 cannot be represented in type
'int'; cast to an unsigned type to negate this value to itself
Fix this by performing the suggested casts.
---
libcxx/include/__numeric/gcd_lcm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 48df2338051e29..37930d29f35a1e 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -39,7 +39,7 @@ struct __ct_abs<_Result, _Source, true> {
if (__t >= 0)
return __t;
if (__t == numeric_limits<_Source>::min())
- return -static_cast<_Result>(__t);
+ return static_cast<_Result>(-static_cast<std::make_unsigned_t<_Result>>(__t));
return -__t;
}
};
>From 3543ddc6b7fc0384cba035a92b76e1208cd4e815 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Mon, 13 Mar 2023 21:06:01 +0100
Subject: [PATCH 2/5] [libc++] Implement std::gcd using the binary version
The binary version is four times faster than current implementation
in my setup, and generally considered a better implementation.
Code inspired by https://en.algorithmica.org/hpc/algorithms/gcd/
which itself is inspired by https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
Fix #77648
---
libcxx/include/__bit/countr.h | 4 +
libcxx/include/__numeric/gcd_lcm.h | 22 ++++-
.../test/libcxx/transitive_includes/cxx03.csv | 1 +
.../test/libcxx/transitive_includes/cxx11.csv | 1 +
.../test/libcxx/transitive_includes/cxx14.csv | 1 +
.../test/libcxx/transitive_includes/cxx17.csv | 1 +
.../test/libcxx/transitive_includes/cxx20.csv | 1 +
.../test/libcxx/transitive_includes/cxx26.csv | 23 +++++
.../numeric.ops/numeric.ops.gcd/gcd.pass.cpp | 83 ++++++++++++++++++-
9 files changed, 134 insertions(+), 3 deletions(-)
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 9e92021fba3551..f58ee5ec582848 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -15,6 +15,7 @@
#include <__bit/rotate.h>
#include <__concepts/arithmetic.h>
#include <__config>
+#include <__type_traits/is_unsigned_integer.h>
#include <limits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -38,6 +39,8 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct
return __builtin_ctzll(__x);
}
+#if _LIBCPP_STD_VER >= 17
+
template <class _Tp>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
#if __has_builtin(__builtin_ctzg)
@@ -62,6 +65,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
}
#endif // __has_builtin(__builtin_ctzg)
}
+#endif
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 37930d29f35a1e..89d47c1241d5a4 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -10,7 +10,9 @@
#ifndef _LIBCPP___NUMERIC_GCD_LCM_H
#define _LIBCPP___NUMERIC_GCD_LCM_H
+#include <__algorithm/min.h>
#include <__assert>
+#include <__bit/countr.h>
#include <__config>
#include <__type_traits/common_type.h>
#include <__type_traits/is_integral.h>
@@ -50,9 +52,25 @@ struct __ct_abs<_Result, _Source, false> {
};
template <class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
static_assert((!is_signed<_Tp>::value), "");
- return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);
+ if (__a == 0)
+ return __b;
+ if (__b == 0)
+ return __a;
+
+ int __az = std::__countr_zero(__a);
+ int __bz = std::__countr_zero(__b);
+ int __shift = std::min(__az, __bz);
+ __b >>= __bz;
+ while (__a != 0) {
+ __a >>= __az;
+ _Tp __absdiff = __a > __b ? __a - __b : __b - __a;
+ __b = std::min(__a, __b);
+ __a = __absdiff;
+ __az = std::__countr_zero(__absdiff);
+ }
+ return __b << __shift;
}
template <class _Tp, class _Up>
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index c2250899a8002b..00a985c399af9f 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -558,6 +558,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 3e929e8f940967..692e11643220d2 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -563,6 +563,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 422db19b6bb8ac..75ae33775971ab 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -566,6 +566,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 422db19b6bb8ac..75ae33775971ab 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -566,6 +566,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 6b80790a9d19b5..c517992ae1cbf9 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -576,6 +576,7 @@ numeric cstddef
numeric cstdint
numeric execution
numeric functional
+numeric initializer_list
numeric iterator
numeric limits
numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index ea01e413458500..fc007aa60a5a90 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -176,6 +176,29 @@ experimental/simd limits
experimental/type_traits initializer_list
experimental/type_traits type_traits
experimental/utility utility
+experimental/vector experimental/memory_resource
+experimental/vector vector
+ext/hash_map algorithm
+ext/hash_map cmath
+ext/hash_map cstddef
+ext/hash_map cstdint
+ext/hash_map cstring
+ext/hash_map functional
+ext/hash_map initializer_list
+ext/hash_map limits
+ext/hash_map new
+ext/hash_map stdexcept
+ext/hash_map string
+ext/hash_set algorithm
+ext/hash_set cmath
+ext/hash_set cstddef
+ext/hash_set cstdint
+ext/hash_set cstring
+ext/hash_set functional
+ext/hash_set initializer_list
+ext/hash_set limits
+ext/hash_set new
+ext/hash_set string
filesystem compare
filesystem cstddef
filesystem cstdint
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
index 831c226f9c8ea1..555b4a05ba8a1b 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
@@ -17,6 +17,7 @@
#include <cassert>
#include <climits>
#include <cstdint>
+#include <random>
#include <type_traits>
#include "test_macros.h"
@@ -48,6 +49,68 @@ constexpr bool test0(int in1, int in2, int out)
return true;
}
+template <typename T>
+T basic_gcd_(T m, T n) {
+ return n == 0 ? m : basic_gcd_<T>(n, m % n);
+}
+
+template <typename T>
+T basic_gcd(T m, T n) {
+ using Tp = std::make_unsigned_t<T>;
+ if (m < 0 && m != std::numeric_limits<T>::min())
+ m = -m;
+ if (n < 0 && n != std::numeric_limits<T>::min())
+ n = -n;
+ return basic_gcd_(static_cast<Tp>(m), static_cast<Tp>(n));
+}
+
+template <typename Input>
+void do_fuzzy_tests() {
+ std::mt19937 gen(1938);
+ std::uniform_int_distribution<Input> distrib;
+
+ constexpr int nb_rounds = 10000;
+ for (int i = 0; i < nb_rounds; ++i) {
+ Input n = distrib(gen);
+ Input m = distrib(gen);
+ assert(std::gcd(n, m) == basic_gcd(n, m));
+ }
+}
+
+template <typename Input>
+void do_limit_tests() {
+ Input inputs[] = {
+ std::numeric_limits<Input>::min(),
+ std::numeric_limits<Input>::max(),
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ (Input)-1,
+ (Input)-2,
+ (Input)-3,
+ (Input)-4,
+ (Input)-5,
+ (Input)-6,
+ (Input)-7,
+ (Input)-8,
+ (Input)-9,
+ (Input)-10,
+ };
+
+ for (auto n : inputs) {
+ for (auto m : inputs) {
+ assert(std::gcd(n, m) == basic_gcd(n, m));
+ }
+ }
+}
template <typename Input1, typename Input2 = Input1>
constexpr bool do_test(int = 0)
@@ -143,5 +206,23 @@ int main(int argc, char**)
assert(res == 2);
}
- return 0;
+ do_fuzzy_tests<std::int8_t>();
+ do_fuzzy_tests<std::int16_t>();
+ do_fuzzy_tests<std::int32_t>();
+ do_fuzzy_tests<std::int64_t>();
+ do_fuzzy_tests<std::uint8_t>();
+ do_fuzzy_tests<std::uint16_t>();
+ do_fuzzy_tests<std::uint32_t>();
+ do_fuzzy_tests<std::uint64_t>();
+
+ do_limit_tests<std::int8_t>();
+ do_limit_tests<std::int16_t>();
+ do_limit_tests<std::int32_t>();
+ do_limit_tests<std::int64_t>();
+ do_limit_tests<std::uint8_t>();
+ do_limit_tests<std::uint16_t>();
+ do_limit_tests<std::uint32_t>();
+ do_limit_tests<std::uint64_t>();
+
+ return 0;
}
>From b36e5202cc8bfd1daf36e6dd036743c66220597d Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Fri, 12 Jan 2024 17:00:41 +0100
Subject: [PATCH 3/5] WIP:benchmark
---
libcxx/benchmarks/CMakeLists.txt | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 5dc3be0c367e5e..93b549a316e385 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -122,7 +122,7 @@ endif()
add_library( cxx-benchmarks-flags-libcxx INTERFACE)
target_link_libraries( cxx-benchmarks-flags-libcxx INTERFACE cxx-benchmarks-flags)
target_compile_options(cxx-benchmarks-flags-libcxx INTERFACE ${SANITIZER_FLAGS} -Wno-user-defined-literals -Wno-suggest-override)
-target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
+target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -lm -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
set(libcxx_benchmark_targets)
@@ -220,6 +220,7 @@ set(BENCHMARK_TESTS
lexicographical_compare_three_way.bench.cpp
map.bench.cpp
monotonic_buffer.bench.cpp
+ numeric/gcd.bench.cpp
ordered_set.bench.cpp
shared_mutex_vs_mutex.bench.cpp
stop_token.bench.cpp
>From 6016ce030061a78db010bb6e38a2623f2e708f56 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Fri, 26 Apr 2024 13:35:25 +0200
Subject: [PATCH 4/5] [WIP] Hybrid approach by @ylchapuy
---
libcxx/include/__numeric/gcd_lcm.h | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 89d47c1241d5a4..13463950f5bcdc 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -54,23 +54,31 @@ struct __ct_abs<_Result, _Source, false> {
template <class _Tp>
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
static_assert((!is_signed<_Tp>::value), "");
- if (__a == 0)
- return __b;
+ if (__a < __b)
+ std::swap(__a, __b);
if (__b == 0)
return __a;
+ __a %= __b; // Make both argument of the same size, and early result in the easy case.
+ if (__a == 0)
+ return __b;
int __az = std::__countr_zero(__a);
int __bz = std::__countr_zero(__b);
int __shift = std::min(__az, __bz);
+ __a >>= __az;
__b >>= __bz;
- while (__a != 0) {
- __a >>= __az;
- _Tp __absdiff = __a > __b ? __a - __b : __b - __a;
- __b = std::min(__a, __b);
- __a = __absdiff;
- __az = std::__countr_zero(__absdiff);
- }
- return __b << __shift;
+ do {
+ _Tp __diff = __a - __b;
+ if (__a > __b) {
+ __a = __b;
+ __b = __diff;
+ } else {
+ __b = __b - __a;
+ }
+ if (__diff != 0)
+ __b >>= std::__countr_zero(__diff);
+ } while (__b != 0);
+ return __a << __shift;
}
template <class _Tp, class _Up>
>From e720ab427e689587ef1df63b0a06df571e4997a1 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Sat, 27 Apr 2024 21:39:04 +0200
Subject: [PATCH 5/5] WIP
---
libcxx/benchmarks/numeric/gcd.bench.cpp | 33 +++++++++++++++++++++++++
libcxx/include/__bit/countr.h | 4 ---
libcxx/include/__numeric/gcd_lcm.h | 3 ++-
3 files changed, 35 insertions(+), 5 deletions(-)
create mode 100644 libcxx/benchmarks/numeric/gcd.bench.cpp
diff --git a/libcxx/benchmarks/numeric/gcd.bench.cpp b/libcxx/benchmarks/numeric/gcd.bench.cpp
new file mode 100644
index 00000000000000..5d8554544bb5fd
--- /dev/null
+++ b/libcxx/benchmarks/numeric/gcd.bench.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <numeric>
+#include <benchmark/benchmark.h>
+#include <cstring>
+#include <random>
+#include <array>
+
+template <class T>
+static std::array<T, 1000> generate(std::uniform_int_distribution<T> distribution = std::uniform_int_distribution<T>{
+ std::numeric_limits<T>::min(), std::numeric_limits<T>::max()}) {
+ std::mt19937 generator;
+ std::array<T, 1000> result;
+ std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
+ return result;
+}
+
+static void bm_gcd(benchmark::State& state) {
+ std::array data = generate<int>();
+ while (state.KeepRunningBatch(data.size()))
+ for (auto v0 : data)
+ for (auto v1 : data)
+ benchmark::DoNotOptimize(std::gcd(v0, v1));
+}
+BENCHMARK(bm_gcd);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index f58ee5ec582848..9e92021fba3551 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -15,7 +15,6 @@
#include <__bit/rotate.h>
#include <__concepts/arithmetic.h>
#include <__config>
-#include <__type_traits/is_unsigned_integer.h>
#include <limits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -39,8 +38,6 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct
return __builtin_ctzll(__x);
}
-#if _LIBCPP_STD_VER >= 17
-
template <class _Tp>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
#if __has_builtin(__builtin_ctzg)
@@ -65,7 +62,6 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
}
#endif // __has_builtin(__builtin_ctzg)
}
-#endif
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 13463950f5bcdc..644aa35174a56d 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -19,6 +19,7 @@
#include <__type_traits/is_same.h>
#include <__type_traits/is_signed.h>
#include <__type_traits/make_unsigned.h>
+#include <__utility/swap.h>
#include <limits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -55,7 +56,7 @@ template <class _Tp>
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
static_assert((!is_signed<_Tp>::value), "");
if (__a < __b)
- std::swap(__a, __b);
+ swap(__a, __b);
if (__b == 0)
return __a;
__a %= __b; // Make both argument of the same size, and early result in the easy case.
More information about the libcxx-commits
mailing list