[libcxx-commits] [libcxx] [libc++] Implement std::gcd using the	binary version (PR #77747)
    via libcxx-commits 
    libcxx-commits at lists.llvm.org
       
    Thu May  2 22:55:22 PDT 2024
    
    
  
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/77747
>From f453cfba09ba5b99ddfcc8199997575becced873 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Mon, 13 Mar 2023 21:06:01 +0100
Subject: [PATCH] [libc++] Implement std::gcd using the binary version
The binary version is four times faster than current implementation
in my setup, and generally considered a better implementation.
Code inspired by https://en.algorithmica.org/hpc/algorithms/gcd/
which itself is inspired by https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
Hybrid approach and benchmarks inspired by `ylchapuy <https://github.com/ylchapuy>`.
Fix #77648
---
 libcxx/benchmarks/CMakeLists.txt              |  3 +-
 libcxx/benchmarks/numeric/gcd.bench.cpp       | 53 +++++++++++
 libcxx/include/__numeric/gcd_lcm.h            | 44 +++++++++-
 .../test/libcxx/transitive_includes/cxx03.csv |  1 +
 .../test/libcxx/transitive_includes/cxx11.csv |  1 +
 .../test/libcxx/transitive_includes/cxx14.csv |  1 +
 .../test/libcxx/transitive_includes/cxx17.csv |  1 +
 .../test/libcxx/transitive_includes/cxx20.csv |  1 +
 .../test/libcxx/transitive_includes/cxx26.csv | 23 +++++
 .../numeric.ops/numeric.ops.gcd/gcd.pass.cpp  | 87 ++++++++++++++++++-
 10 files changed, 211 insertions(+), 4 deletions(-)
 create mode 100644 libcxx/benchmarks/numeric/gcd.bench.cpp
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 5dc3be0c367e5e..93b549a316e385 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -122,7 +122,7 @@ endif()
 add_library(           cxx-benchmarks-flags-libcxx INTERFACE)
 target_link_libraries( cxx-benchmarks-flags-libcxx INTERFACE cxx-benchmarks-flags)
 target_compile_options(cxx-benchmarks-flags-libcxx INTERFACE ${SANITIZER_FLAGS} -Wno-user-defined-literals -Wno-suggest-override)
-target_link_options(   cxx-benchmarks-flags-libcxx INTERFACE -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
+target_link_options(   cxx-benchmarks-flags-libcxx INTERFACE -lm -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
 
 set(libcxx_benchmark_targets)
 
@@ -220,6 +220,7 @@ set(BENCHMARK_TESTS
     lexicographical_compare_three_way.bench.cpp
     map.bench.cpp
     monotonic_buffer.bench.cpp
+    numeric/gcd.bench.cpp
     ordered_set.bench.cpp
     shared_mutex_vs_mutex.bench.cpp
     stop_token.bench.cpp
diff --git a/libcxx/benchmarks/numeric/gcd.bench.cpp b/libcxx/benchmarks/numeric/gcd.bench.cpp
new file mode 100644
index 00000000000000..8ebb15e40038d1
--- /dev/null
+++ b/libcxx/benchmarks/numeric/gcd.bench.cpp
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <array>
+#include <benchmark/benchmark.h>
+#include <cstring>
+#include <numeric>
+#include <random>
+
+template <class T>
+static std::array<T, 1000> generate(std::uniform_int_distribution<T> distribution = std::uniform_int_distribution<T>{
+                                        std::numeric_limits<T>::min(), std::numeric_limits<T>::max()}) {
+  std::mt19937 generator;
+  std::array<T, 1000> result;
+  std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
+  return result;
+}
+
+static void bm_gcd_random(benchmark::State& state) {
+  std::array data = generate<int>();
+  while (state.KeepRunningBatch(data.size()))
+    for (auto v0 : data)
+      for (auto v1 : data)
+        benchmark::DoNotOptimize(std::gcd(v0, v1));
+}
+BENCHMARK(bm_gcd_random);
+
+static void bm_gcd_trivial(benchmark::State& state) {
+  int lhs = ~static_cast<int>(0), rhs = 1;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(lhs);
+    benchmark::DoNotOptimize(rhs);
+    benchmark::DoNotOptimize(std::gcd(lhs, rhs));
+  }
+}
+BENCHMARK(bm_gcd_trivial);
+
+static void bm_gcd_complex(benchmark::State& state) {
+  int lhs = 2971215073, rhs = 1836311903;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(lhs);
+    benchmark::DoNotOptimize(rhs);
+    benchmark::DoNotOptimize(std::gcd(lhs, rhs));
+  }
+}
+BENCHMARK(bm_gcd_complex);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
index 48df2338051e29..5d735a51a47ebc 100644
--- a/libcxx/include/__numeric/gcd_lcm.h
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -10,7 +10,9 @@
 #ifndef _LIBCPP___NUMERIC_GCD_LCM_H
 #define _LIBCPP___NUMERIC_GCD_LCM_H
 
+#include <__algorithm/min.h>
 #include <__assert>
+#include <__bit/countr.h>
 #include <__config>
 #include <__type_traits/common_type.h>
 #include <__type_traits/is_integral.h>
@@ -50,9 +52,47 @@ struct __ct_abs<_Result, _Source, false> {
 };
 
 template <class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
   static_assert((!is_signed<_Tp>::value), "");
-  return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);
+
+  // From: https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor
+  //
+  // If power of two divides both numbers, we can push it out.
+  // - gcd( 2^x * a, 2^x * b) = 2^x * gcd(a, b)
+  //
+  // If and only if exactly one number is even, we can divide that number by that power.
+  // - if a, b are odd, then gcd(2^x * a, b) = gcd(a, b)
+  //
+  // And standard gcd algorithm where instead of modulo, minus is used.
+
+  if (__a < __b) {
+    _Tp __tmp = __b;
+    __b       = __a;
+    __a       = __tmp;
+  }
+  if (__b == 0)
+    return __a;
+  __a %= __b; // Make both argument of the same size, and early result in the easy case.
+  if (__a == 0)
+    return __b;
+
+  int __az    = std::__countr_zero(__a);
+  int __bz    = std::__countr_zero(__b);
+  int __shift = std::min(__az, __bz);
+  __a >>= __az;
+  __b >>= __bz;
+  do {
+    _Tp __diff = __a - __b;
+    if (__a > __b) {
+      __a = __b;
+      __b = __diff;
+    } else {
+      __b = __b - __a;
+    }
+    if (__diff != 0)
+      __b >>= std::__countr_zero(__diff);
+  } while (__b != 0);
+  return __a << __shift;
 }
 
 template <class _Tp, class _Up>
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index cf0af3b8bb392d..92601fab5b7730 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -570,6 +570,7 @@ numeric cstddef
 numeric cstdint
 numeric execution
 numeric functional
+numeric initializer_list
 numeric iterator
 numeric limits
 numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index f514ee3028f64a..c05eb42deb9a1f 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -575,6 +575,7 @@ numeric cstddef
 numeric cstdint
 numeric execution
 numeric functional
+numeric initializer_list
 numeric iterator
 numeric limits
 numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 43e3f996adba36..09252b7b7d2dba 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -578,6 +578,7 @@ numeric cstddef
 numeric cstdint
 numeric execution
 numeric functional
+numeric initializer_list
 numeric iterator
 numeric limits
 numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 43e3f996adba36..09252b7b7d2dba 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -578,6 +578,7 @@ numeric cstddef
 numeric cstdint
 numeric execution
 numeric functional
+numeric initializer_list
 numeric iterator
 numeric limits
 numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 8463f17db411ab..ce4ccc3d116153 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -589,6 +589,7 @@ numeric cstddef
 numeric cstdint
 numeric execution
 numeric functional
+numeric initializer_list
 numeric iterator
 numeric limits
 numeric new
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 62d931c0eebade..f68249aeec78c9 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -176,6 +176,29 @@ experimental/simd limits
 experimental/type_traits initializer_list
 experimental/type_traits type_traits
 experimental/utility utility
+experimental/vector experimental/memory_resource
+experimental/vector vector
+ext/hash_map algorithm
+ext/hash_map cmath
+ext/hash_map cstddef
+ext/hash_map cstdint
+ext/hash_map cstring
+ext/hash_map functional
+ext/hash_map initializer_list
+ext/hash_map limits
+ext/hash_map new
+ext/hash_map stdexcept
+ext/hash_map string
+ext/hash_set algorithm
+ext/hash_set cmath
+ext/hash_set cstddef
+ext/hash_set cstdint
+ext/hash_set cstring
+ext/hash_set functional
+ext/hash_set initializer_list
+ext/hash_set limits
+ext/hash_set new
+ext/hash_set string
 filesystem compare
 filesystem cstddef
 filesystem cstdint
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
index 831c226f9c8ea1..ebcc71036fddaf 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
@@ -17,6 +17,7 @@
 #include <cassert>
 #include <climits>
 #include <cstdint>
+#include <random>
 #include <type_traits>
 
 #include "test_macros.h"
@@ -48,6 +49,72 @@ constexpr bool test0(int in1, int in2, int out)
     return true;
 }
 
+template <typename T>
+T basic_gcd_(T m, T n) {
+  return n == 0 ? m : basic_gcd_<T>(n, m % n);
+}
+
+template <typename T>
+T basic_gcd(T m, T n) {
+  using Tp = std::make_unsigned_t<T>;
+  if (m < 0 && m != std::numeric_limits<T>::min())
+    m = -m;
+  if (n < 0 && n != std::numeric_limits<T>::min())
+    n = -n;
+  return basic_gcd_(static_cast<Tp>(m), static_cast<Tp>(n));
+}
+
+template <typename Input>
+void do_fuzzy_tests() {
+  std::mt19937 gen(1938);
+  std::uniform_int_distribution<Input> distrib;
+
+  constexpr int nb_rounds = 10000;
+  for (int i = 0; i < nb_rounds; ++i) {
+    Input n = distrib(gen);
+    Input m = distrib(gen);
+    assert(std::gcd(n, m) == basic_gcd(n, m));
+  }
+}
+
+template <typename Input>
+void do_limit_tests() {
+  Input inputs[] = {
+      std::numeric_limits<Input>::min(),
+      std::numeric_limits<Input>::min() + 1,
+      std::numeric_limits<Input>::min() + 2,
+      std::numeric_limits<Input>::max(),
+      std::numeric_limits<Input>::max() - 1,
+      std::numeric_limits<Input>::max() - 2,
+      0,
+      1,
+      2,
+      3,
+      4,
+      5,
+      6,
+      7,
+      8,
+      9,
+      10,
+      (Input)-1,
+      (Input)-2,
+      (Input)-3,
+      (Input)-4,
+      (Input)-5,
+      (Input)-6,
+      (Input)-7,
+      (Input)-8,
+      (Input)-9,
+      (Input)-10,
+  };
+
+  for (auto n : inputs) {
+    for (auto m : inputs) {
+      assert(std::gcd(n, m) == basic_gcd(n, m));
+    }
+  }
+}
 
 template <typename Input1, typename Input2 = Input1>
 constexpr bool do_test(int = 0)
@@ -143,5 +210,23 @@ int main(int argc, char**)
     assert(res == 2);
     }
 
-  return 0;
+    do_fuzzy_tests<std::int8_t>();
+    do_fuzzy_tests<std::int16_t>();
+    do_fuzzy_tests<std::int32_t>();
+    do_fuzzy_tests<std::int64_t>();
+    do_fuzzy_tests<std::uint8_t>();
+    do_fuzzy_tests<std::uint16_t>();
+    do_fuzzy_tests<std::uint32_t>();
+    do_fuzzy_tests<std::uint64_t>();
+
+    do_limit_tests<std::int8_t>();
+    do_limit_tests<std::int16_t>();
+    do_limit_tests<std::int32_t>();
+    do_limit_tests<std::int64_t>();
+    do_limit_tests<std::uint8_t>();
+    do_limit_tests<std::uint16_t>();
+    do_limit_tests<std::uint32_t>();
+    do_limit_tests<std::uint64_t>();
+
+    return 0;
 }
    
    
More information about the libcxx-commits
mailing list