[libcxx-commits] [libcxx] [libc++] Add randomize unspecified stability in `__hash_table` (PR #105982)
Arvid Jonasson via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Aug 25 05:36:44 PDT 2024
https://github.com/arvidjonasson updated https://github.com/llvm/llvm-project/pull/105982
>From 83eb7ea93543875cbe816a96a06a7ed4b9be1874 Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 13:51:18 +0200
Subject: [PATCH] [libc++] Enable randomized element order during rehash in
unordered_{set,map,multiset,multimap} under
_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
- Adds randomization of element order during rehash in unordered containers when the _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY flag is set, similar to existing behavior in sort, nth_element, and partial_sort.
---
.../UnspecifiedBehaviorRandomization.rst | 2 +
libcxx/include/__hash_table | 57 ++++++++++++++
.../unord/hash_table_randomize_order.pass.cpp | 77 +++++++++++++++++++
3 files changed, 136 insertions(+)
create mode 100644 libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
diff --git a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
index 70278798ecf630..3e52a51684507e 100644
--- a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
+++ b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
@@ -82,5 +82,7 @@ Currently supported randomization
on the order of the remaining part
* ``std::nth_element``, there is no guarantee on the order from both sides of the
partition
+* ``std::unordered_{set,map}``, there is no guarantee on the order of the elements
+* ``std::unordered_{multiset,multimap}``, there is no guarantee on the order of the elements nor the order of equal elements
Patches welcome.
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d5fbc92a3dfc4e..d6931a81d10a27 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -45,6 +45,11 @@
#include <limits>
#include <new> // __launder
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+# include <__debug_utils/randomize_range.h>
+# include <__numeric/iota.h>
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
@@ -980,6 +985,9 @@ private:
template <bool _UniqueKeys>
_LIBCPP_HIDE_FROM_ABI void __do_rehash(size_type __n);
+ template <bool _UniqueKeys>
+ _LIBCPP_HIDE_FROM_ABI void __debug_randomize_order();
+
template <class... _Args>
_LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);
@@ -1702,6 +1710,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D
template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <bool _UniqueKeys>
void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
+ __debug_randomize_order<_UniqueKeys>();
__pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
__bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
__bucket_list_.get_deleter().size() = __nbc;
@@ -1741,6 +1750,54 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
}
}
+template <class _Tp, class _Hash, class _Equal, class _Alloc>
+template <bool _UniqueKeys>
+void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+ size_type __total_nodes = size();
+ size_type __initialized_nodes = 0;
+
+ // Storage to handle non-assignable, non-default constructible __node_holder.
+ union __nh_storage {
+ __nh_storage() {}
+ ~__nh_storage() {}
+ __node_holder __nh;
+ };
+
+ auto __nh_storage_deleter = [&__initialized_nodes](__nh_storage* __p) {
+ for (size_type __i = 0; __i < __initialized_nodes; ++__i)
+ __p[__i].__nh.~__node_holder();
+ delete[] __p;
+ };
+
+ // Allocate storage for nodes and indices.
+ unique_ptr<__nh_storage[], decltype(__nh_storage_deleter)> __nodes(
+ new __nh_storage[__total_nodes], __nh_storage_deleter);
+ unique_ptr<size_type[]> __randomized_indices(new size_type[__total_nodes]);
+
+ // Move nodes into temporary storage.
+ for (; __initialized_nodes < __total_nodes; ++__initialized_nodes)
+ new (std::addressof(__nodes[__initialized_nodes].__nh)) __node_holder(remove(begin()));
+
+ // Randomize the order of indices.
+ std::iota(__randomized_indices.get(), __randomized_indices.get() + __total_nodes, size_type{0});
+ __debug_randomize_range<_ClassicAlgPolicy>(__randomized_indices.get(), __randomized_indices.get() + __total_nodes);
+
+ // Reinsert nodes into the hash table in randomized order.
+ for (size_type __i = 0; __i < __total_nodes; ++__i) {
+ __node_holder& __nh = __nodes[__randomized_indices[__i]].__nh;
+ __node_pointer __np = __nh->__upcast();
+ if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
+ __node_insert_unique_perform(__np);
+ } else {
+ __next_pointer __pn = __node_insert_multi_prepare(__np->__hash(), __np->__get_value());
+ __node_insert_multi_perform(__np, __pn);
+ }
+ __nh.release();
+ }
+#endif
+}
+
template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <class _Key>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
diff --git a/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
new file mode 100644
index 00000000000000..1db6857f580efd
--- /dev/null
+++ b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Test std::unordered_{set,map,multiset,multimap} randomization
+
+// UNSUPPORTED: c++03
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+
+#include <unordered_set>
+#include <unordered_map>
+#include <cassert>
+#include <vector>
+#include <algorithm>
+
+const int kSize = 128;
+
+template <typename T, typename F>
+T get_random(F get_value) {
+ T v;
+ v.reserve(kSize);
+ for (int i = 0; i < kSize; ++i) {
+ v.insert(get_value());
+ }
+ v.rehash(v.bucket_count() + 1);
+ return v;
+}
+
+template <typename T, typename F>
+T get_deterministic(F get_value) {
+ T v;
+ v.reserve(kSize);
+ for (int i = 0; i < kSize; ++i) {
+ v.insert(get_value());
+ }
+ return v;
+}
+
+template <typename T>
+struct RemoveConst {
+ using type = T;
+};
+
+template <typename T, typename U>
+struct RemoveConst<std::pair<const T, U>> {
+ using type = std::pair<T, U>;
+};
+
+template <typename T, typename F>
+void test_randomization(F get_value) {
+ T t1 = get_deterministic<T>(get_value), t2 = get_random<T>(get_value);
+
+ // Convert pair<const K, V> to pair<K, V> so it can be sorted
+ using U = typename RemoveConst<typename T::value_type>::type;
+
+ std::vector<U> t1v(t1.begin(), t1.end()), t2v(t2.begin(), t2.end());
+
+ assert(t1v != t2v);
+
+ std::sort(t1v.begin(), t1v.end());
+ std::sort(t2v.begin(), t2v.end());
+
+ assert(t1v == t2v);
+}
+
+int main(int, char**) {
+ int i = 0, j = 0;
+ test_randomization<std::unordered_set<int>>([i]() mutable { return i++; });
+ test_randomization<std::unordered_map<int, int>>([i, j]() mutable { return std::make_pair(i++, j++); });
+ test_randomization<std::unordered_multiset<int>>([i]() mutable { return i++ % 32; });
+ test_randomization<std::unordered_multimap<int, int>>([i, j]() mutable { return std::make_pair(i++ % 32, j++); });
+ return 0;
+}
More information about the libcxx-commits
mailing list