[libcxx-commits] [libcxx] [libc++] Add randomize unspecified stability in `__hash_table` (PR #105982)
Arvid Jonasson via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Aug 25 14:07:41 PDT 2024
https://github.com/arvidjonasson updated https://github.com/llvm/llvm-project/pull/105982
>From 83eb7ea93543875cbe816a96a06a7ed4b9be1874 Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 13:51:18 +0200
Subject: [PATCH 1/4] [libc++] Enable randomized element order during rehash in
unordered_{set,map,multiset,multimap} under
_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
- Adds randomization of element order during rehash in unordered containers when the _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY flag is set, similar to existing behavior in sort, nth_element, and partial_sort.
---
.../UnspecifiedBehaviorRandomization.rst | 2 +
libcxx/include/__hash_table | 57 ++++++++++++++
.../unord/hash_table_randomize_order.pass.cpp | 77 +++++++++++++++++++
3 files changed, 136 insertions(+)
create mode 100644 libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
diff --git a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
index 70278798ecf630..3e52a51684507e 100644
--- a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
+++ b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
@@ -82,5 +82,7 @@ Currently supported randomization
on the order of the remaining part
* ``std::nth_element``, there is no guarantee on the order from both sides of the
partition
+* ``std::unordered_{set,map}``, there is no guarantee on the order of the elements
+* ``std::unordered_{multiset,multimap}``, there is no guarantee on the order of the elements nor the order of equal elements
Patches welcome.
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d5fbc92a3dfc4e..d6931a81d10a27 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -45,6 +45,11 @@
#include <limits>
#include <new> // __launder
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+# include <__debug_utils/randomize_range.h>
+# include <__numeric/iota.h>
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
@@ -980,6 +985,9 @@ private:
template <bool _UniqueKeys>
_LIBCPP_HIDE_FROM_ABI void __do_rehash(size_type __n);
+ template <bool _UniqueKeys>
+ _LIBCPP_HIDE_FROM_ABI void __debug_randomize_order();
+
template <class... _Args>
_LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);
@@ -1702,6 +1710,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D
template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
+ __debug_randomize_order<_UniqueKeys>();
__pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
__bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
__bucket_list_.get_deleter().size() = __nbc;
@@ -1741,6 +1750,54 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
}
}
+template <class _Tp, class _Hash, class _Equal, class _Alloc>
+template <bool _UniqueKeys>
+void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+ size_type __total_nodes = size();
+ size_type __initialized_nodes = 0;
+
+ // Storage to handle non-assignable, non-default constructible __node_holder.
+ union __nh_storage {
+ __nh_storage() {}
+ ~__nh_storage() {}
+ __node_holder __nh;
+ };
+
+ auto __nh_storage_deleter = [&__initialized_nodes](__nh_storage* __p) {
+ for (size_type __i = 0; __i < __initialized_nodes; ++__i)
+ __p[__i].__nh.~__node_holder();
+ delete[] __p;
+ };
+
+ // Allocate storage for nodes and indices.
+ unique_ptr<__nh_storage[], decltype(__nh_storage_deleter)> __nodes(
+ new __nh_storage[__total_nodes], __nh_storage_deleter);
+ unique_ptr<size_type[]> __randomized_indices(new size_type[__total_nodes]);
+
+ // Move nodes into temporary storage.
+ for (; __initialized_nodes < __total_nodes; ++__initialized_nodes)
+ new (std::addressof(__nodes[__initialized_nodes].__nh)) __node_holder(remove(begin()));
+
+ // Randomize the order of indices.
+ std::iota(__randomized_indices.get(), __randomized_indices.get() + __total_nodes, size_type{0});
+ __debug_randomize_range<_ClassicAlgPolicy>(__randomized_indices.get(), __randomized_indices.get() + __total_nodes);
+
+ // Reinsert nodes into the hash table in randomized order.
+ for (size_type __i = 0; __i < __total_nodes; ++__i) {
+ __node_holder& __nh = __nodes[__randomized_indices[__i]].__nh;
+ __node_pointer __np = __nh->__upcast();
+ if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
+ __node_insert_unique_perform(__np);
+ } else {
+ __next_pointer __pn = __node_insert_multi_prepare(__np->__hash(), __np->__get_value());
+ __node_insert_multi_perform(__np, __pn);
+ }
+ __nh.release();
+ }
+#endif
+}
+
template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <class _Key>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
diff --git a/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
new file mode 100644
index 00000000000000..1db6857f580efd
--- /dev/null
+++ b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Test std::unordered_{set,map,multiset,multimap} randomization
+
+// UNSUPPORTED: c++03
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+
+#include <unordered_set>
+#include <unordered_map>
+#include <cassert>
+#include <vector>
+#include <algorithm>
+
+const int kSize = 128;
+
+template <typename T, typename F>
+T get_random(F get_value) {
+ T v;
+ v.reserve(kSize);
+ for (int i = 0; i < kSize; ++i) {
+ v.insert(get_value());
+ }
+ v.rehash(v.bucket_count() + 1);
+ return v;
+}
+
+template <typename T, typename F>
+T get_deterministic(F get_value) {
+ T v;
+ v.reserve(kSize);
+ for (int i = 0; i < kSize; ++i) {
+ v.insert(get_value());
+ }
+ return v;
+}
+
+template <typename T>
+struct RemoveConst {
+ using type = T;
+};
+
+template <typename T, typename U>
+struct RemoveConst<std::pair<const T, U>> {
+ using type = std::pair<T, U>;
+};
+
+template <typename T, typename F>
+void test_randomization(F get_value) {
+ T t1 = get_deterministic<T>(get_value), t2 = get_random<T>(get_value);
+
+ // Convert pair<const K, V> to pair<K, V> so it can be sorted
+ using U = typename RemoveConst<typename T::value_type>::type;
+
+ std::vector<U> t1v(t1.begin(), t1.end()), t2v(t2.begin(), t2.end());
+
+ assert(t1v != t2v);
+
+ std::sort(t1v.begin(), t1v.end());
+ std::sort(t2v.begin(), t2v.end());
+
+ assert(t1v == t2v);
+}
+
+int main(int, char**) {
+ int i = 0, j = 0;
+ test_randomization<std::unordered_set<int>>([i]() mutable { return i++; });
+ test_randomization<std::unordered_map<int, int>>([i, j]() mutable { return std::make_pair(i++, j++); });
+ test_randomization<std::unordered_multiset<int>>([i]() mutable { return i++ % 32; });
+ test_randomization<std::unordered_multimap<int, int>>([i, j]() mutable { return std::make_pair(i++ % 32, j++); });
+ return 0;
+}
>From 661a7ddf68ac3f2e83fdf4ef807edfb267e9284c Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 17:27:47 +0200
Subject: [PATCH 2/4] Fix recursive inlining by extracting function
`__node_insert_multi_find_insertion_point()` from
`__node_insert_multi_prepare()`. Use user defined allocator for temporary
node and indices buffers in `__debug_randomize_order()`.
---
libcxx/include/__hash_table | 100 +++++++++++++++++++++++-------------
1 file changed, 63 insertions(+), 37 deletions(-)
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d6931a81d10a27..6c35d8b2c16519 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -780,6 +780,7 @@ public:
}
private:
+ _LIBCPP_HIDE_FROM_ABI __next_pointer __node_insert_multi_find_insertion_point(size_t __cp_hash, value_type& __cp_val);
_LIBCPP_HIDE_FROM_ABI __next_pointer __node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val);
_LIBCPP_HIDE_FROM_ABI void __node_insert_multi_perform(__node_pointer __cp, __next_pointer __pn) _NOEXCEPT;
@@ -1403,22 +1404,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __
return pair<iterator, bool>(iterator(__existing_node), __inserted);
}
-// Prepare the container for an insertion of the value __cp_val with the hash
-// __cp_hash. This does a lookup into the container to see if __cp_value is
-// already present, and performs a rehash if necessary. Returns a pointer to the
-// last occurrence of __cp_val in the map.
-//
-// Note that this function does forward exceptions if key_eq() throws, and never
-// mutates __value or actually inserts into the map.
+// Does lookup into the container to see if __cp_val is already present, and
+// returns a pointer to the last occurrence of __cp_val in the map.
template <class _Tp, class _Hash, class _Equal, class _Alloc>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val) {
- size_type __bc = bucket_count();
- if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
- __rehash_multi(std::max<size_type>(
- 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
- __bc = bucket_count();
- }
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_find_insertion_point(
+ size_t __cp_hash, value_type& __cp_val) {
+ size_type __bc = bucket_count();
size_t __chash = std::__constrain_hash(__cp_hash, __bc);
__next_pointer __pn = __bucket_list_[__chash];
if (__pn != nullptr) {
@@ -1442,6 +1434,24 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __c
return __pn;
}
+// Prepare the container for an insertion of the value __cp_val with the hash
+// __cp_hash. This does a lookup into the container to see if __cp_value is
+// already present, and performs a rehash if necessary. Returns a pointer to the
+// last occurrence of __cp_val in the map.
+//
+// Note that this function does forward exceptions if key_eq() throws, and never
+// mutates __value or actually inserts into the map.
+template <class _Tp, class _Hash, class _Equal, class _Alloc>
+typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __cp_hash, value_type& __cp_val) {
+ size_type __bc = bucket_count();
+ if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
+ __rehash_multi(std::max<size_type>(
+ 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
+ }
+ return __node_insert_multi_find_insertion_point(__cp_hash, __cp_val);
+}
+
// Insert the node __cp into the container after __pn (which is the last node in
// the bucket that compares equal to __cp). Rehashing, and checking for
// uniqueness has already been performed (in __node_insert_multi_prepare), so
@@ -1754,43 +1764,59 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
- size_type __total_nodes = size();
- size_type __initialized_nodes = 0;
-
- // Storage to handle non-assignable, non-default constructible __node_holder.
- union __nh_storage {
- __nh_storage() {}
- ~__nh_storage() {}
- __node_holder __nh;
+
+ struct __nh_vec {
+ using __nh_allocator = __rebind_alloc<__node_traits, __node_holder>;
+ using __pointer = typename allocator_traits<__nh_allocator>::pointer;
+ __nh_allocator __nh_alloc;
+ __pointer __p = __pointer();
+ size_type __total_nodes = 0;
+ size_type __initialized_nodes = 0;
+ __nh_vec(size_type __n) : __nh_alloc(), __p(__nh_alloc.allocate(__n)), __total_nodes(__n) {}
+ ~__nh_vec() {
+ if (__p) {
+ std::__destroy(__p, __p + __initialized_nodes);
+ __nh_alloc.deallocate(__p, __total_nodes);
+ }
+ }
};
- auto __nh_storage_deleter = [&__initialized_nodes](__nh_storage* __p) {
- for (size_type __i = 0; __i < __initialized_nodes; ++__i)
- __p[__i].__nh.~__node_holder();
- delete[] __p;
+ struct __index_vec {
+ using __index_allocator = __rebind_alloc<__node_traits, size_type>;
+ using __pointer = typename allocator_traits<__index_allocator>::pointer;
+ __index_allocator __index_alloc;
+ __pointer __p = __pointer();
+ size_type __total_indices = 0;
+ size_type __initialized_indices = 0;
+ __index_vec(size_type __n) : __index_alloc(), __p(__index_alloc.allocate(__n)), __total_indices(__n) {}
+ ~__index_vec() {
+ if (__p) {
+ std::__destroy(__p, __p + __initialized_indices);
+ __index_alloc.deallocate(__p, __total_indices);
+ }
+ }
};
- // Allocate storage for nodes and indices.
- unique_ptr<__nh_storage[], decltype(__nh_storage_deleter)> __nodes(
- new __nh_storage[__total_nodes], __nh_storage_deleter);
- unique_ptr<size_type[]> __randomized_indices(new size_type[__total_nodes]);
+ __nh_vec __nhv(size());
+ __index_vec __iv(size());
// Move nodes into temporary storage.
- for (; __initialized_nodes < __total_nodes; ++__initialized_nodes)
- new (std::addressof(__nodes[__initialized_nodes].__nh)) __node_holder(remove(begin()));
+ for (; __nhv.__initialized_nodes < __nhv.__total_nodes; ++__nhv.__initialized_nodes)
+ std::__construct_at(std::addressof(__nhv.__p[__nhv.__initialized_nodes]), remove(begin()));
// Randomize the order of indices.
- std::iota(__randomized_indices.get(), __randomized_indices.get() + __total_nodes, size_type{0});
- __debug_randomize_range<_ClassicAlgPolicy>(__randomized_indices.get(), __randomized_indices.get() + __total_nodes);
+ for (; __iv.__initialized_indices < __nhv.__initialized_nodes; ++__iv.__initialized_indices)
+ std::__construct_at(std::addressof(__iv.__p[__iv.__initialized_indices]), __iv.__initialized_indices);
+ __debug_randomize_range<_ClassicAlgPolicy>(__iv.__p, __iv.__p + __iv.__initialized_indices);
// Reinsert nodes into the hash table in randomized order.
- for (size_type __i = 0; __i < __total_nodes; ++__i) {
- __node_holder& __nh = __nodes[__randomized_indices[__i]].__nh;
+ for (size_type __i = 0; __i < __iv.__initialized_indices; ++__i) {
+ __node_holder& __nh = __nhv.__p[__iv.__p[__i]];
__node_pointer __np = __nh->__upcast();
if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
__node_insert_unique_perform(__np);
} else {
- __next_pointer __pn = __node_insert_multi_prepare(__np->__hash(), __np->__get_value());
+ __next_pointer __pn = __node_insert_multi_find_insertion_point(__np->__hash(), __np->__get_value());
__node_insert_multi_perform(__np, __pn);
}
__nh.release();
>From 5acc5ba0e007e1e90dd26fe3bb79bafc7c56a3fc Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 17:40:18 +0200
Subject: [PATCH 3/4] Remove unused import.
---
libcxx/include/__hash_table | 1 -
1 file changed, 1 deletion(-)
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index 6c35d8b2c16519..c179bf37267d59 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -47,7 +47,6 @@
#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
# include <__debug_utils/randomize_range.h>
-# include <__numeric/iota.h>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
>From 9dcaa2c96d99db8bc03797ca7d02f9ec0890c05f Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 23:07:24 +0200
Subject: [PATCH 4/4] Fix import of `_ClassicAlgPolicy` when building with
modules.
---
libcxx/include/__hash_table | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index c179bf37267d59..820356350d63ef 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -10,11 +10,13 @@
#ifndef _LIBCPP___HASH_TABLE
#define _LIBCPP___HASH_TABLE
+#include <__algorithm/iterator_operations.h>
#include <__algorithm/max.h>
#include <__algorithm/min.h>
#include <__assert>
#include <__bit/countl.h>
#include <__config>
+#include <__debug_utils/randomize_range.h>
#include <__functional/hash.h>
#include <__functional/invoke.h>
#include <__iterator/iterator_traits.h>
@@ -45,10 +47,6 @@
#include <limits>
#include <new> // __launder
-#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
-# include <__debug_utils/randomize_range.h>
-#endif
-
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
More information about the libcxx-commits
mailing list