[libcxx-commits] [libcxx] [libc++] Add randomize unspecified stability in `__hash_table` (PR #105982)

Arvid Jonasson via libcxx-commits libcxx-commits at lists.llvm.org
Sun Aug 25 14:07:41 PDT 2024


https://github.com/arvidjonasson updated https://github.com/llvm/llvm-project/pull/105982

>From 83eb7ea93543875cbe816a96a06a7ed4b9be1874 Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 13:51:18 +0200
Subject: [PATCH 1/4] [libc++] Enable randomized element order during rehash in
 unordered_{set,map,multiset,multimap} under
 _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY

- Adds randomization of element order during rehash in unordered containers when the _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY flag is set, similar to existing behavior in sort, nth_element, and partial_sort.
---
 .../UnspecifiedBehaviorRandomization.rst      |  2 +
 libcxx/include/__hash_table                   | 57 ++++++++++++++
 .../unord/hash_table_randomize_order.pass.cpp | 77 +++++++++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp

diff --git a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
index 70278798ecf630..3e52a51684507e 100644
--- a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
+++ b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
@@ -82,5 +82,7 @@ Currently supported randomization
    on the order of the remaining part
 * ``std::nth_element``, there is no guarantee on the order from both sides of the
    partition
+* ``std::unordered_{set,map}``, there is no guarantee on the order of the elements
+* ``std::unordered_{multiset,multimap}``, there is no guarantee on the order of the elements nor the order of equal elements
 
 Patches welcome.
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d5fbc92a3dfc4e..d6931a81d10a27 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -45,6 +45,11 @@
 #include <limits>
 #include <new> // __launder
 
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+#  include <__debug_utils/randomize_range.h>
+#  include <__numeric/iota.h>
+#endif
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
@@ -980,6 +985,9 @@ private:
   template <bool _UniqueKeys>
   _LIBCPP_HIDE_FROM_ABI void __do_rehash(size_type __n);
 
+  template <bool _UniqueKeys>
+  _LIBCPP_HIDE_FROM_ABI void __debug_randomize_order();
+
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);
 
@@ -1702,6 +1710,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <bool _UniqueKeys>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
+  __debug_randomize_order<_UniqueKeys>();
   __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
   __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
   __bucket_list_.get_deleter().size() = __nbc;
@@ -1741,6 +1750,54 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
   }
 }
 
+template <class _Tp, class _Hash, class _Equal, class _Alloc>
+template <bool _UniqueKeys>
+void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+  size_type __total_nodes       = size();
+  size_type __initialized_nodes = 0;
+
+  // Storage to handle non-assignable, non-default constructible __node_holder.
+  union __nh_storage {
+    __nh_storage() {}
+    ~__nh_storage() {}
+    __node_holder __nh;
+  };
+
+  auto __nh_storage_deleter = [&__initialized_nodes](__nh_storage* __p) {
+    for (size_type __i = 0; __i < __initialized_nodes; ++__i)
+      __p[__i].__nh.~__node_holder();
+    delete[] __p;
+  };
+
+  // Allocate storage for nodes and indices.
+  unique_ptr<__nh_storage[], decltype(__nh_storage_deleter)> __nodes(
+      new __nh_storage[__total_nodes], __nh_storage_deleter);
+  unique_ptr<size_type[]> __randomized_indices(new size_type[__total_nodes]);
+
+  // Move nodes into temporary storage.
+  for (; __initialized_nodes < __total_nodes; ++__initialized_nodes)
+    new (std::addressof(__nodes[__initialized_nodes].__nh)) __node_holder(remove(begin()));
+
+  // Randomize the order of indices.
+  std::iota(__randomized_indices.get(), __randomized_indices.get() + __total_nodes, size_type{0});
+  __debug_randomize_range<_ClassicAlgPolicy>(__randomized_indices.get(), __randomized_indices.get() + __total_nodes);
+
+  // Reinsert nodes into the hash table in randomized order.
+  for (size_type __i = 0; __i < __total_nodes; ++__i) {
+    __node_holder& __nh = __nodes[__randomized_indices[__i]].__nh;
+    __node_pointer __np = __nh->__upcast();
+    if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
+      __node_insert_unique_perform(__np);
+    } else {
+      __next_pointer __pn = __node_insert_multi_prepare(__np->__hash(), __np->__get_value());
+      __node_insert_multi_perform(__np, __pn);
+    }
+    __nh.release();
+  }
+#endif
+}
+
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <class _Key>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
diff --git a/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
new file mode 100644
index 00000000000000..1db6857f580efd
--- /dev/null
+++ b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Test std::unordered_{set,map,multiset,multimap} randomization
+
+// UNSUPPORTED: c++03
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+
+#include <unordered_set>
+#include <unordered_map>
+#include <cassert>
+#include <vector>
+#include <algorithm>
+
+const int kSize = 128;
+
+template <typename T, typename F>
+T get_random(F get_value) {
+  T v;
+  v.reserve(kSize);
+  for (int i = 0; i < kSize; ++i) {
+    v.insert(get_value());
+  }
+  v.rehash(v.bucket_count() + 1);
+  return v;
+}
+
+template <typename T, typename F>
+T get_deterministic(F get_value) {
+  T v;
+  v.reserve(kSize);
+  for (int i = 0; i < kSize; ++i) {
+    v.insert(get_value());
+  }
+  return v;
+}
+
+template <typename T>
+struct RemoveConst {
+  using type = T;
+};
+
+template <typename T, typename U>
+struct RemoveConst<std::pair<const T, U>> {
+  using type = std::pair<T, U>;
+};
+
+template <typename T, typename F>
+void test_randomization(F get_value) {
+  T t1 = get_deterministic<T>(get_value), t2 = get_random<T>(get_value);
+
+  // Convert pair<const K, V> to pair<K, V> so it can be sorted
+  using U = typename RemoveConst<typename T::value_type>::type;
+
+  std::vector<U> t1v(t1.begin(), t1.end()), t2v(t2.begin(), t2.end());
+
+  assert(t1v != t2v);
+
+  std::sort(t1v.begin(), t1v.end());
+  std::sort(t2v.begin(), t2v.end());
+
+  assert(t1v == t2v);
+}
+
+int main(int, char**) {
+  int i = 0, j = 0;
+  test_randomization<std::unordered_set<int>>([i]() mutable { return i++; });
+  test_randomization<std::unordered_map<int, int>>([i, j]() mutable { return std::make_pair(i++, j++); });
+  test_randomization<std::unordered_multiset<int>>([i]() mutable { return i++ % 32; });
+  test_randomization<std::unordered_multimap<int, int>>([i, j]() mutable { return std::make_pair(i++ % 32, j++); });
+  return 0;
+}

>From 661a7ddf68ac3f2e83fdf4ef807edfb267e9284c Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 17:27:47 +0200
Subject: [PATCH 2/4] Fix recursive inlining by extracting function
 `__node_insert_multi_find_insertion_point()` from
 `__node_insert_multi_prepare()`. Use user defined allocator for temporary
 node and indices buffers in `__debug_randomize_order()`.

---
 libcxx/include/__hash_table | 100 +++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 37 deletions(-)

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d6931a81d10a27..6c35d8b2c16519 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -780,6 +780,7 @@ public:
   }
 
 private:
+  _LIBCPP_HIDE_FROM_ABI __next_pointer __node_insert_multi_find_insertion_point(size_t __cp_hash, value_type& __cp_val);
   _LIBCPP_HIDE_FROM_ABI __next_pointer __node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val);
   _LIBCPP_HIDE_FROM_ABI void __node_insert_multi_perform(__node_pointer __cp, __next_pointer __pn) _NOEXCEPT;
 
@@ -1403,22 +1404,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __
   return pair<iterator, bool>(iterator(__existing_node), __inserted);
 }
 
-// Prepare the container for an insertion of the value __cp_val with the hash
-// __cp_hash. This does a lookup into the container to see if __cp_value is
-// already present, and performs a rehash if necessary. Returns a pointer to the
-// last occurrence of __cp_val in the map.
-//
-// Note that this function does forward exceptions if key_eq() throws, and never
-// mutates __value or actually inserts into the map.
+// Does lookup into the container to see if __cp_val is already present, and
+// returns a pointer to the last occurrence of __cp_val in the map.
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val) {
-  size_type __bc = bucket_count();
-  if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
-    __rehash_multi(std::max<size_type>(
-        2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
-    __bc = bucket_count();
-  }
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_find_insertion_point(
+    size_t __cp_hash, value_type& __cp_val) {
+  size_type __bc      = bucket_count();
   size_t __chash      = std::__constrain_hash(__cp_hash, __bc);
   __next_pointer __pn = __bucket_list_[__chash];
   if (__pn != nullptr) {
@@ -1442,6 +1434,24 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __c
   return __pn;
 }
 
+// Prepare the container for an insertion of the value __cp_val with the hash
+// __cp_hash. This does a lookup into the container to see if __cp_value is
+// already present, and performs a rehash if necessary. Returns a pointer to the
+// last occurrence of __cp_val in the map.
+//
+// Note that this function does forward exceptions if key_eq() throws, and never
+// mutates __value or actually inserts into the map.
+template <class _Tp, class _Hash, class _Equal, class _Alloc>
+typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val) {
+  size_type __bc = bucket_count();
+  if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
+    __rehash_multi(std::max<size_type>(
+        2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
+  }
+  return __node_insert_multi_find_insertion_point(__cp_hash, __cp_val);
+}
+
 // Insert the node __cp into the container after __pn (which is the last node in
 // the bucket that compares equal to __cp). Rehashing, and checking for
 // uniqueness has already been performed (in __node_insert_multi_prepare), so
@@ -1754,43 +1764,59 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <bool _UniqueKeys>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
 #ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
-  size_type __total_nodes       = size();
-  size_type __initialized_nodes = 0;
-
-  // Storage to handle non-assignable, non-default constructible __node_holder.
-  union __nh_storage {
-    __nh_storage() {}
-    ~__nh_storage() {}
-    __node_holder __nh;
+
+  struct __nh_vec {
+    using __nh_allocator = __rebind_alloc<__node_traits, __node_holder>;
+    using __pointer      = typename allocator_traits<__nh_allocator>::pointer;
+    __nh_allocator __nh_alloc;
+    __pointer __p                 = __pointer();
+    size_type __total_nodes       = 0;
+    size_type __initialized_nodes = 0;
+    __nh_vec(size_type __n) : __nh_alloc(), __p(__nh_alloc.allocate(__n)), __total_nodes(__n) {}
+    ~__nh_vec() {
+      if (__p) {
+        std::__destroy(__p, __p + __initialized_nodes);
+        __nh_alloc.deallocate(__p, __total_nodes);
+      }
+    }
   };
 
-  auto __nh_storage_deleter = [&__initialized_nodes](__nh_storage* __p) {
-    for (size_type __i = 0; __i < __initialized_nodes; ++__i)
-      __p[__i].__nh.~__node_holder();
-    delete[] __p;
+  struct __index_vec {
+    using __index_allocator = __rebind_alloc<__node_traits, size_type>;
+    using __pointer         = typename allocator_traits<__index_allocator>::pointer;
+    __index_allocator __index_alloc;
+    __pointer __p                   = __pointer();
+    size_type __total_indices       = 0;
+    size_type __initialized_indices = 0;
+    __index_vec(size_type __n) : __index_alloc(), __p(__index_alloc.allocate(__n)), __total_indices(__n) {}
+    ~__index_vec() {
+      if (__p) {
+        std::__destroy(__p, __p + __initialized_indices);
+        __index_alloc.deallocate(__p, __total_indices);
+      }
+    }
   };
 
-  // Allocate storage for nodes and indices.
-  unique_ptr<__nh_storage[], decltype(__nh_storage_deleter)> __nodes(
-      new __nh_storage[__total_nodes], __nh_storage_deleter);
-  unique_ptr<size_type[]> __randomized_indices(new size_type[__total_nodes]);
+  __nh_vec __nhv(size());
+  __index_vec __iv(size());
 
   // Move nodes into temporary storage.
-  for (; __initialized_nodes < __total_nodes; ++__initialized_nodes)
-    new (std::addressof(__nodes[__initialized_nodes].__nh)) __node_holder(remove(begin()));
+  for (; __nhv.__initialized_nodes < __nhv.__total_nodes; ++__nhv.__initialized_nodes)
+    std::__construct_at(std::addressof(__nhv.__p[__nhv.__initialized_nodes]), remove(begin()));
 
   // Randomize the order of indices.
-  std::iota(__randomized_indices.get(), __randomized_indices.get() + __total_nodes, size_type{0});
-  __debug_randomize_range<_ClassicAlgPolicy>(__randomized_indices.get(), __randomized_indices.get() + __total_nodes);
+  for (; __iv.__initialized_indices < __nhv.__initialized_nodes; ++__iv.__initialized_indices)
+    std::__construct_at(std::addressof(__iv.__p[__iv.__initialized_indices]), __iv.__initialized_indices);
+  __debug_randomize_range<_ClassicAlgPolicy>(__iv.__p, __iv.__p + __iv.__initialized_indices);
 
   // Reinsert nodes into the hash table in randomized order.
-  for (size_type __i = 0; __i < __total_nodes; ++__i) {
-    __node_holder& __nh = __nodes[__randomized_indices[__i]].__nh;
+  for (size_type __i = 0; __i < __iv.__initialized_indices; ++__i) {
+    __node_holder& __nh = __nhv.__p[__iv.__p[__i]];
     __node_pointer __np = __nh->__upcast();
     if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
       __node_insert_unique_perform(__np);
     } else {
-      __next_pointer __pn = __node_insert_multi_prepare(__np->__hash(), __np->__get_value());
+      __next_pointer __pn = __node_insert_multi_find_insertion_point(__np->__hash(), __np->__get_value());
       __node_insert_multi_perform(__np, __pn);
     }
     __nh.release();

>From 5acc5ba0e007e1e90dd26fe3bb79bafc7c56a3fc Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 17:40:18 +0200
Subject: [PATCH 3/4] Remove unused import.

---
 libcxx/include/__hash_table | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index 6c35d8b2c16519..c179bf37267d59 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -47,7 +47,6 @@
 
 #ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
 #  include <__debug_utils/randomize_range.h>
-#  include <__numeric/iota.h>
 #endif
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)

>From 9dcaa2c96d99db8bc03797ca7d02f9ec0890c05f Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 23:07:24 +0200
Subject: [PATCH 4/4] Fix import of `_ClassicAlgPolicy` when building with
 modules.

---
 libcxx/include/__hash_table | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index c179bf37267d59..820356350d63ef 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -10,11 +10,13 @@
 #ifndef _LIBCPP___HASH_TABLE
 #define _LIBCPP___HASH_TABLE
 
+#include <__algorithm/iterator_operations.h>
 #include <__algorithm/max.h>
 #include <__algorithm/min.h>
 #include <__assert>
 #include <__bit/countl.h>
 #include <__config>
+#include <__debug_utils/randomize_range.h>
 #include <__functional/hash.h>
 #include <__functional/invoke.h>
 #include <__iterator/iterator_traits.h>
@@ -45,10 +47,6 @@
 #include <limits>
 #include <new> // __launder
 
-#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
-#  include <__debug_utils/randomize_range.h>
-#endif
-
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif



More information about the libcxx-commits mailing list