[libcxx-commits] [libcxx] [libc++] Add randomize unspecified stability in `__hash_table` (PR #105982)

Arvid Jonasson via libcxx-commits libcxx-commits at lists.llvm.org
Sun Aug 25 05:13:10 PDT 2024


https://github.com/arvidjonasson created https://github.com/llvm/llvm-project/pull/105982

Adds functionality requested in #102303.
Expands on functionality of https://libcxx.llvm.org/DesignDocs/UnspecifiedBehaviorRandomization.html.

- Add randomization of element order during rehash in unordered containers (`std::unordered_{set,map,multiset,multimap}`) when the `_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY` flag is set, similar to existing behavior in `std::sort`, `std::nth_element`, and `std::partial_sort`.
  - For `std::unordered_{multiset,multimap}`, equal ranges are shuffled and order within equal ranges are shuffled.
  - For `std::unordered_{set,map}`, order of elements are shuffled.
- Add test `libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp` to assert that randomization works correctly.

>From e60b49aea1ef150b05e5708ba375457e3b176d68 Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 25 Aug 2024 13:51:18 +0200
Subject: [PATCH] [libc++] Enable randomized element order during rehash in
 unordered_{set,map,multiset,multimap} under
 _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY

- Adds randomization of element order during rehash in unordered containers when the _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY flag is set, similar to existing behavior in sort, nth_element, and partial_sort.
---
 .../UnspecifiedBehaviorRandomization.rst      |  2 +
 libcxx/include/__hash_table                   | 57 +++++++++++++
 .../unord/hash_table_randomize_order.pass.cpp | 79 +++++++++++++++++++
 3 files changed, 138 insertions(+)
 create mode 100644 libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp

diff --git a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
index 70278798ecf630..3e52a51684507e 100644
--- a/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
+++ b/libcxx/docs/DesignDocs/UnspecifiedBehaviorRandomization.rst
@@ -82,5 +82,7 @@ Currently supported randomization
    on the order of the remaining part
 * ``std::nth_element``, there is no guarantee on the order from both sides of the
    partition
+* ``std::unordered_{set,map}``, there is no guarantee on the order of the elements
+* ``std::unordered_{multiset,multimap}``, there is no guarantee on the order of the elements nor the order of equal elements
 
 Patches welcome.
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d5fbc92a3dfc4e..d6931a81d10a27 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -45,6 +45,11 @@
 #include <limits>
 #include <new> // __launder
 
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+#  include <__debug_utils/randomize_range.h>
+#  include <__numeric/iota.h>
+#endif
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
@@ -980,6 +985,9 @@ private:
   template <bool _UniqueKeys>
   _LIBCPP_HIDE_FROM_ABI void __do_rehash(size_type __n);
 
+  template <bool _UniqueKeys>
+  _LIBCPP_HIDE_FROM_ABI void __debug_randomize_order();
+
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);
 
@@ -1702,6 +1710,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <bool _UniqueKeys>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
+  __debug_randomize_order<_UniqueKeys>();
   __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
   __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
   __bucket_list_.get_deleter().size() = __nbc;
@@ -1741,6 +1750,54 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
   }
 }
 
+template <class _Tp, class _Hash, class _Equal, class _Alloc>
+template <bool _UniqueKeys>
+void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__debug_randomize_order() {
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+  size_type __total_nodes       = size();
+  size_type __initialized_nodes = 0;
+
+  // Storage to handle non-assignable, non-default constructible __node_holder.
+  union __nh_storage {
+    __nh_storage() {}
+    ~__nh_storage() {}
+    __node_holder __nh;
+  };
+
+  auto __nh_storage_deleter = [&__initialized_nodes](__nh_storage* __p) {
+    for (size_type __i = 0; __i < __initialized_nodes; ++__i)
+      __p[__i].__nh.~__node_holder();
+    delete[] __p;
+  };
+
+  // Allocate storage for nodes and indices.
+  unique_ptr<__nh_storage[], decltype(__nh_storage_deleter)> __nodes(
+      new __nh_storage[__total_nodes], __nh_storage_deleter);
+  unique_ptr<size_type[]> __randomized_indices(new size_type[__total_nodes]);
+
+  // Move nodes into temporary storage.
+  for (; __initialized_nodes < __total_nodes; ++__initialized_nodes)
+    new (std::addressof(__nodes[__initialized_nodes].__nh)) __node_holder(remove(begin()));
+
+  // Randomize the order of indices.
+  std::iota(__randomized_indices.get(), __randomized_indices.get() + __total_nodes, size_type{0});
+  __debug_randomize_range<_ClassicAlgPolicy>(__randomized_indices.get(), __randomized_indices.get() + __total_nodes);
+
+  // Reinsert nodes into the hash table in randomized order.
+  for (size_type __i = 0; __i < __total_nodes; ++__i) {
+    __node_holder& __nh = __nodes[__randomized_indices[__i]].__nh;
+    __node_pointer __np = __nh->__upcast();
+    if _LIBCPP_CONSTEXPR_SINCE_CXX17 (_UniqueKeys) {
+      __node_insert_unique_perform(__np);
+    } else {
+      __next_pointer __pn = __node_insert_multi_prepare(__np->__hash(), __np->__get_value());
+      __node_insert_multi_perform(__np, __pn);
+    }
+    __nh.release();
+  }
+#endif
+}
+
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <class _Key>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
diff --git a/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
new file mode 100644
index 00000000000000..bec3c5d353f83f
--- /dev/null
+++ b/libcxx/test/libcxx/containers/unord/hash_table_randomize_order.pass.cpp
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Test std::unordered_{set,map,multiset,multimap} randomization
+
+// UNSUPPORTED: c++03
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+
+#include <unordered_set>
+#include <unordered_map>
+#include <cassert>
+#include <vector>
+#include <algorithm>
+
+const int kSize = 128;
+
+template <typename T, typename F>
+T get_random(F get_value) {
+  T v;
+  v.reserve(kSize);
+  for (int i = 0; i < kSize; ++i) {
+    v.insert(get_value());
+  }
+  v.rehash(v.bucket_count() + 1);
+  return v;
+}
+
+template <typename T, typename F>
+T get_deterministic(F get_value) {
+  T v;
+  v.reserve(kSize);
+  for (int i = 0; i < kSize; ++i) {
+    v.insert(get_value());
+  }
+  return v;
+}
+
+template <typename T>
+struct RemoveConst {
+  using type = T;
+};
+
+template <typename T, typename U>
+struct RemoveConst<std::pair<const T, U>> {
+  using type = std::pair<T, U>;
+};
+
+template <typename T, typename F>
+void test_randomization(F get_value) {
+  T t1 = get_deterministic<T>(get_value), t2 = get_random<T>(get_value);
+
+  // Convert pair<const K, V> to pair<K, V> so it can be sorted
+  using U = typename RemoveConst<typename T::value_type>::type;
+
+  std::vector<U> t1v(t1.begin(), t1.end()), t2v(t2.begin(), t2.end());
+
+  assert(t1v != t2v);
+
+  std::sort(t1v.begin(), t1v.end());
+  std::sort(t2v.begin(), t2v.end());
+
+  assert(t1v == t2v);
+}
+
+int main(int, char**) {
+  int i = 0, j = 0;
+  test_randomization<std::unordered_set<int>>([i]() mutable { return i++; });
+  test_randomization<std::unordered_map<int, int>>([i, j]() mutable { return std::make_pair(i++, j++); });
+  test_randomization<std::unordered_multiset<int>>([i]() mutable { return i++ % 32; });
+  test_randomization<std::unordered_multimap<int, int>>([i, j]() mutable {
+    return std::make_pair(i++ % 32, j++);
+  });
+  return 0;
+}



More information about the libcxx-commits mailing list