[libcxx-commits] [libcxx] Insert new nodes at the beginning of equal range in std::unordered_multimap (PR #104702)

via libcxx-commits libcxx-commits at lists.llvm.org
Sun Aug 18 06:01:17 PDT 2024


https://github.com/arvidjonasson created https://github.com/llvm/llvm-project/pull/104702

- Changed the insertion and emplacement logic in `std::unordered_multimap` to insert new nodes at the beginning of an equal range instead of the back.
- This change addresses a performance issue where inserting at the back degraded performance to that of a linked list for large numbers of elements with equal keys.
- The new approach improves performance significantly, fixing a bug reported in 2013 (Bug 16747 in LLVM Bugzilla) that was previously marked as WONTFIX.
- Note: This change may break code that relies on the order of elements within each equal range, although such order is not guaranteed by the standard.
- This new behavior mimics the insertion logic of the `libstdc++` implementation.
- Added a benchmark to measure the performance improvements of the new insertion logic.

>From eb582960ab4c408e55b0765804f64e4fed58d458 Mon Sep 17 00:00:00 2001
From: Arvid Jonasson <jonassonarvid02 at gmail.com>
Date: Sun, 18 Aug 2024 14:50:58 +0200
Subject: [PATCH] Insert new nodes at the beginning of equal range in
 std::unordered_multimap

- Changed the insertion and emplacement logic in `std::unordered_multimap` to insert new nodes at the beginning of an equal range instead of the back.
- This change addresses a performance issue where inserting at the back degraded performance to that of a linked list for large numbers of elements with equal keys.
- The new approach improves performance significantly, fixing a bug reported in 2013 (Bug 16747 in LLVM Bugzilla) that was previously marked as WONTFIX.
- Note: This change may break code that relies on the order of elements within each equal range, although such order is not guaranteed by the standard.
- This new behavior mimics the insertion logic of the `libstdc++` implementation.
- Added a benchmark to measure the performance improvements of the new insertion logic.
---
 libcxx/include/__hash_table                   |  27 +-
 libcxx/test/benchmarks/CMakeLists.txt         |   1 +
 libcxx/test/benchmarks/ContainerBenchmarks.h  |  16 +-
 .../unordered_multimap_operations.bench.cpp   | 260 ++++++++++++++++++
 .../emplace_hint.pass.cpp                     |  16 --
 5 files changed, 286 insertions(+), 34 deletions(-)
 create mode 100644 libcxx/test/benchmarks/unordered_multimap_operations.bench.cpp

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d5fbc92a3dfc4e..d4c18599c92509 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -1398,7 +1398,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __
 // Prepare the container for an insertion of the value __cp_val with the hash
 // __cp_hash. This does a lookup into the container to see if __cp_value is
 // already present, and performs a rehash if necessary. Returns a pointer to the
-// last occurrence of __cp_val in the map.
+// node before the insertion point, or nullptr if the insertion point is the
+// first node in the bucket.
 //
 // Note that this function does forward exceptions if key_eq() throws, and never
 // mutates __value or actually inserts into the map.
@@ -1414,28 +1415,20 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __c
   size_t __chash      = std::__constrain_hash(__cp_hash, __bc);
   __next_pointer __pn = __bucket_list_[__chash];
   if (__pn != nullptr) {
-    for (bool __found = false;
-         __pn->__next_ != nullptr && std::__constrain_hash(__pn->__next_->__hash(), __bc) == __chash;
+    for (; __pn->__next_ != nullptr && std::__constrain_hash(__pn->__next_->__hash(), __bc) == __chash;
          __pn = __pn->__next_) {
-      //      __found    key_eq()     action
-      //      false       false       loop
-      //      true        true        loop
-      //      false       true        set __found to true
-      //      true        false       break
-      if (__found !=
-          (__pn->__next_->__hash() == __cp_hash && key_eq()(__pn->__next_->__upcast()->__get_value(), __cp_val))) {
-        if (!__found)
-          __found = true;
-        else
-          break;
-      }
+      // Iterate until either:
+      //   __pn->__next_ is nullptr, or
+      //   __pn->__next_ is in a different bucket, or
+      //   __pn->__next_ has the same key as __cp_val
+      if (__pn->__next_->__hash() == __cp_hash && key_eq()(__pn->__next_->__upcast()->__get_value(), __cp_val))
+        break;
     }
   }
   return __pn;
 }
 
-// Insert the node __cp into the container after __pn (which is the last node in
-// the bucket that compares equal to __cp). Rehashing, and checking for
+// Insert the node __cp into the container after __pn. Rehashing, and checking for
 // uniqueness has already been performed (in __node_insert_multi_prepare), so
 // all we need to do is update the bucket and size(). Assumes that __cp->__hash
 // is up-to-date.
diff --git a/libcxx/test/benchmarks/CMakeLists.txt b/libcxx/test/benchmarks/CMakeLists.txt
index 616cf0ff8d2374..3d51a3599a1f8a 100644
--- a/libcxx/test/benchmarks/CMakeLists.txt
+++ b/libcxx/test/benchmarks/CMakeLists.txt
@@ -168,6 +168,7 @@ set(BENCHMARK_TESTS
     stringstream.bench.cpp
     system_error.bench.cpp
     to_chars.bench.cpp
+    unordered_multimap_operations.bench.cpp
     unordered_set_operations.bench.cpp
     util_smartptr.bench.cpp
     variant_visit_1.bench.cpp
diff --git a/libcxx/test/benchmarks/ContainerBenchmarks.h b/libcxx/test/benchmarks/ContainerBenchmarks.h
index 744505b439985b..bcfe8c9f9e0f16 100644
--- a/libcxx/test/benchmarks/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/ContainerBenchmarks.h
@@ -99,7 +99,21 @@ void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
   while (st.KeepRunning()) {
     c.clear();
     for (auto it = in.begin(); it != end; ++it) {
-      benchmark::DoNotOptimize(&(*c.insert(*it).first));
+      benchmark::DoNotOptimize(c.insert(*it));
+    }
+    benchmark::ClobberMemory();
+  }
+}
+
+template <class Container, class... GenInputs>
+void BM_EmplaceValue(benchmark::State& st, Container c, GenInputs... gen) {
+  auto num  = st.range(0);
+  auto args = std::make_tuple(gen(num)...);
+  while (st.KeepRunning()) {
+    c.clear();
+    for (decltype(num) i = 0; i < num; ++i) {
+      benchmark::DoNotOptimize(
+          std::apply([&c, i](auto&&... args) -> decltype(auto) { return c.emplace(args[i]...); }, args));
     }
     benchmark::ClobberMemory();
   }
diff --git a/libcxx/test/benchmarks/unordered_multimap_operations.bench.cpp b/libcxx/test/benchmarks/unordered_multimap_operations.bench.cpp
new file mode 100644
index 00000000000000..0d921896901c0f
--- /dev/null
+++ b/libcxx/test/benchmarks/unordered_multimap_operations.bench.cpp
@@ -0,0 +1,260 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <unordered_map>
+#include <vector>
+#include <utility>
+
+#include "benchmark/benchmark.h"
+
+#include "ContainerBenchmarks.h"
+#include "GenerateInput.h"
+#include "test_macros.h"
+
+using namespace ContainerBenchmarks;
+
+constexpr size_t TestNumInputs = 1024;
+
+template <class GenInputs>
+inline auto getRandomWithMaxCardinality(GenInputs genInputs, size_t maxCardinality) {
+  return [genInputs = std::forward<GenInputs>(genInputs), maxCardinality](size_t N) {
+    auto possibleInputs = genInputs(maxCardinality);
+    decltype(possibleInputs) inputs;
+    size_t minIdx = 0, maxIdx = possibleInputs.size() - 1;
+    for (size_t i = 0; i < N; ++i) {
+      inputs.push_back(possibleInputs[getRandomInteger(minIdx, maxIdx)]);
+    }
+    return inputs;
+  };
+}
+
+template <class GenInputsFirst, class GenInputsSecond>
+inline auto genRandomPairInputs(GenInputsFirst genFirst, GenInputsSecond genSecond) {
+  return [genFirst  = std::forward< GenInputsFirst >(genFirst),
+          genSecond = std::forward< GenInputsSecond >(genSecond)](size_t N) {
+    auto first  = genFirst(N);
+    auto second = genSecond(N);
+    std::vector<std::pair<typename decltype(first)::value_type, typename decltype(second)::value_type>> inputs;
+    inputs.reserve(N);
+    for (size_t i = 0; i < N; ++i) {
+      inputs.emplace_back(std::move(first[i]), std::move(second[i]));
+    }
+    return inputs;
+  };
+}
+
+//----------------------------------------------------------------------------//
+//                       BM_InsertValue
+// ---------------------------------------------------------------------------//
+
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getRandomIntegerInputs<uint32_t>, getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32_sorted,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getSortedIntegerInputs<uint32_t>, getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+// String //
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_string,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  genRandomPairInputs(getRandomStringInputs, getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+// Prefixed String //
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_prefixed_string,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  genRandomPairInputs(getPrefixedRandomStringInputs, getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+// Random with max cardinalities [512, 128, 32, 8, 1] //
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32_max_cardinality_512,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 512),
+                                      getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32_max_cardinality_128,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 128),
+                                      getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32_max_cardinality_32,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 32),
+                                      getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32_max_cardinality_8,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 8),
+                                      getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_InsertValue,
+                  unordered_multimap_uint32_max_cardinality_1,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  genRandomPairInputs(getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 1),
+                                      getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(
+    BM_InsertValue,
+    unordered_multimap_string_max_cardinality_512,
+    std::unordered_multimap<std::string, uint32_t>{},
+    genRandomPairInputs(getRandomWithMaxCardinality(getRandomStringInputs, 512), getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(
+    BM_InsertValue,
+    unordered_multimap_string_max_cardinality_128,
+    std::unordered_multimap<std::string, uint32_t>{},
+    genRandomPairInputs(getRandomWithMaxCardinality(getRandomStringInputs, 128), getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(
+    BM_InsertValue,
+    unordered_multimap_string_max_cardinality_32,
+    std::unordered_multimap<std::string, uint32_t>{},
+    genRandomPairInputs(getRandomWithMaxCardinality(getRandomStringInputs, 32), getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(
+    BM_InsertValue,
+    unordered_multimap_string_max_cardinality_8,
+    std::unordered_multimap<std::string, uint32_t>{},
+    genRandomPairInputs(getRandomWithMaxCardinality(getRandomStringInputs, 8), getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(
+    BM_InsertValue,
+    unordered_multimap_string_max_cardinality_1,
+    std::unordered_multimap<std::string, uint32_t>{},
+    genRandomPairInputs(getRandomWithMaxCardinality(getRandomStringInputs, 1), getRandomIntegerInputs<uint32_t>))
+    ->Arg(TestNumInputs);
+
+//----------------------------------------------------------------------------//
+//                       BM_EmplaceValue
+// ---------------------------------------------------------------------------//
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getRandomIntegerInputs<uint32_t>,
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32_sorted,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getSortedIntegerInputs<uint32_t>,
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+// String //
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_string,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getRandomStringInputs,
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+// Prefixed String //
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_prefixed_string,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getPrefixedRandomStringInputs,
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+// Random with max cardinalities [512, 128, 32, 8] //
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32_max_cardinality_512,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 512),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32_max_cardinality_128,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 128),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32_max_cardinality_32,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 32),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32_max_cardinality_8,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 8),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_uint32_max_cardinality_1,
+                  std::unordered_multimap<uint32_t, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomIntegerInputs<uint32_t>, 1),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_string_max_cardinality_512,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomStringInputs, 512),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_string_max_cardinality_128,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomStringInputs, 128),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_string_max_cardinality_32,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomStringInputs, 32),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_string_max_cardinality_8,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomStringInputs, 8),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_EmplaceValue,
+                  unordered_multimap_string_max_cardinality_1,
+                  std::unordered_multimap<std::string, uint32_t>{},
+                  getRandomWithMaxCardinality(getRandomStringInputs, 1),
+                  getRandomIntegerInputs<uint32_t>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.pass.cpp
index 2a6e3f60ca6b57..ec31c2be1fa747 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.pass.cpp
@@ -43,20 +43,12 @@ int main(int, char**)
         assert(c.size() == 2);
         assert(r->first == 3);
         assert(r->second == Emplaceable(5, 6));
-        LIBCPP_ASSERT(r == std::next(c.begin()));
 
         r = c.emplace_hint(r, std::piecewise_construct, std::forward_as_tuple(3),
                                                         std::forward_as_tuple(6, 7));
         assert(c.size() == 3);
         assert(r->first == 3);
         assert(r->second == Emplaceable(6, 7));
-        LIBCPP_ASSERT(r == std::next(c.begin()));
-        r = c.begin();
-        assert(r->first == 3);
-        LIBCPP_ASSERT(r->second == Emplaceable());
-        r = std::next(r, 2);
-        assert(r->first == 3);
-        LIBCPP_ASSERT(r->second == Emplaceable(5, 6));
     }
     {
         typedef std::unordered_multimap<int, Emplaceable, std::hash<int>, std::equal_to<int>,
@@ -74,20 +66,12 @@ int main(int, char**)
         assert(c.size() == 2);
         assert(r->first == 3);
         assert(r->second == Emplaceable(5, 6));
-        LIBCPP_ASSERT(r == std::next(c.begin()));
 
         r = c.emplace_hint(r, std::piecewise_construct, std::forward_as_tuple(3),
                                                         std::forward_as_tuple(6, 7));
         assert(c.size() == 3);
         assert(r->first == 3);
         assert(r->second == Emplaceable(6, 7));
-        LIBCPP_ASSERT(r == std::next(c.begin()));
-        r = c.begin();
-        assert(r->first == 3);
-        LIBCPP_ASSERT(r->second == Emplaceable());
-        r = std::next(r, 2);
-        assert(r->first == 3);
-        LIBCPP_ASSERT(r->second == Emplaceable(5, 6));
     }
 
   return 0;



More information about the libcxx-commits mailing list