[libcxx-commits] [libcxx] [libc++] Implement generic associative container benchmarks (PR #123663)

via libcxx-commits libcxx-commits at lists.llvm.org
Mon Jan 20 11:14:27 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: Louis Dionne (ldionne)

<details>
<summary>Changes</summary>

This patch implements generic associative container benchmarks for containers with unique keys. In doing so, it replaces the existing std::map benchmarks which were based on the cartesian product infrastructure and were too slow to execute.

These new benchmarks aim to strike a balance between exhaustive coverage of all operations in the most interesting case, while executing fairly rapidly (~40s on my machine).

---

Patch is 54.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123663.diff


5 Files Affected:

- (modified) libcxx/test/benchmarks/GenerateInput.h (+33) 
- (added) libcxx/test/benchmarks/containers/associative_container_benchmarks.h (+533) 
- (added) libcxx/test/benchmarks/containers/flat_map.bench.cpp (+25) 
- (modified) libcxx/test/benchmarks/containers/map.bench.cpp (+8-932) 
- (added) libcxx/test/benchmarks/containers/set.bench.cpp (+23) 


``````````diff
diff --git a/libcxx/test/benchmarks/GenerateInput.h b/libcxx/test/benchmarks/GenerateInput.h
index 6d5c5167e91ed8..c87fd69162e9d4 100644
--- a/libcxx/test/benchmarks/GenerateInput.h
+++ b/libcxx/test/benchmarks/GenerateInput.h
@@ -11,6 +11,7 @@
 
 #include <algorithm>
 #include <climits>
+#include <concepts>
 #include <cstddef>
 #include <random>
 #include <string>
@@ -171,4 +172,36 @@ inline std::vector<const char*> getRandomCStringInputs(std::size_t N) {
   return cinputs;
 }
 
+template <class T>
+struct Generate {
+  // When the contents don't matter
+  static T arbitrary();
+
+  // Prefer a cheap-to-construct element if possible
+  static T cheap();
+
+  // Prefer an expensive-to-construct element if possible
+  static T expensive();
+};
+
+template <class T>
+  requires std::integral<T>
+struct Generate<T> {
+  static T arbitrary() { return 42; }
+  static T cheap() { return 42; }
+  static T expensive() { return 42; }
+  static T random() { return getRandomInteger<T>(std::numeric_limits<T>::min(), std::numeric_limits<T>::max()); }
+};
+
+template <>
+struct Generate<std::string> {
+  static std::string arbitrary() { return "hello world"; }
+  static std::string cheap() { return "small"; }
+  static std::string expensive() { return std::string(256, 'x'); }
+  static std::string random() {
+    auto length = getRandomInteger<std::size_t>(1, 1024);
+    return getRandomString(length);
+  }
+};
+
 #endif // BENCHMARK_GENERATE_INPUT_H
diff --git a/libcxx/test/benchmarks/containers/associative_container_benchmarks.h b/libcxx/test/benchmarks/containers/associative_container_benchmarks.h
new file mode 100644
index 00000000000000..0d485be74d498b
--- /dev/null
+++ b/libcxx/test/benchmarks/containers/associative_container_benchmarks.h
@@ -0,0 +1,533 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_BENCHMARKS_CONTAINERS_ASSOCIATIVE_CONTAINER_BENCHMARKS_H
+#define TEST_BENCHMARKS_CONTAINERS_ASSOCIATIVE_CONTAINER_BENCHMARKS_H
+
+#include <algorithm>
+#include <iterator>
+#include <map>
+#include <flat_map>
+#include <random>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "../GenerateInput.h"
+
+namespace support {
+
+template <class Container>
+struct adapt_operations;
+
+template <class K>
+struct adapt_operations<std::set<K>> {
+  using ValueType = typename std::set<K>::value_type;
+  using KeyType   = typename std::set<K>::key_type;
+  static ValueType value_from_key(KeyType const& k) { return k; }
+  static KeyType key_from_value(ValueType const& value) { return value; }
+};
+
+template <class K, class V>
+struct adapt_operations<std::map<K, V>> {
+  using ValueType = typename std::map<K, V>::value_type;
+  using KeyType   = typename std::map<K, V>::key_type;
+  static ValueType value_from_key(KeyType const& k) { return {k, Generate<V>::arbitrary()}; }
+  static KeyType key_from_value(ValueType const& value) { return value.first; }
+};
+
+template <class K, class V>
+struct adapt_operations<std::flat_map<K, V>> {
+  using ValueType = typename std::map<K, V>::value_type;
+  using KeyType   = typename std::map<K, V>::key_type;
+  static ValueType value_from_key(KeyType const& k) { return {k, Generate<V>::arbitrary()}; }
+  static KeyType key_from_value(ValueType const& value) { return value.first; }
+};
+
+template <class Container>
+void associative_container_benchmarks(std::string container) {
+  using Key   = typename Container::key_type;
+  using Value = typename Container::value_type;
+
+  auto generate_unique_keys = [=](std::size_t n) {
+    std::set<Key> keys;
+    while (keys.size() < n) {
+      Key k = Generate<Key>::random();
+      keys.insert(k);
+    }
+    return std::vector<Key>(keys.begin(), keys.end());
+  };
+
+  auto add_dummy_mapped_type = [](std::vector<Key> const& keys) {
+    std::vector<Value> kv;
+    for (Key const& k : keys)
+      kv.push_back(adapt_operations<Container>::value_from_key(k));
+    return kv;
+  };
+
+  auto get_key = [](Value const& v) { return adapt_operations<Container>::key_from_value(v); };
+
+  // These benchmarks are structured to perform the operation being benchmarked
+  // a small number of times at each iteration, in order to offset the cost of
+  // PauseTiming() and ResumeTiming().
+  static constexpr std::size_t BatchSize = 10;
+
+  struct ScratchSpace {
+    char storage[sizeof(Container)];
+  };
+
+  /////////////////////////
+  // Constructors
+  /////////////////////////
+  benchmark::RegisterBenchmark(container + "::ctor(const&)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Container src(in.begin(), in.end());
+    ScratchSpace c[BatchSize];
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        new (c + i) Container(src);
+        benchmark::DoNotOptimize(c + i);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        reinterpret_cast<Container*>(c + i)->~Container();
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::ctor(iterator, iterator) (unsorted sequence)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::mt19937 randomness;
+    std::vector<Key> keys = generate_unique_keys(size);
+    std::shuffle(keys.begin(), keys.end(), randomness);
+    std::vector<Value> in = add_dummy_mapped_type(keys);
+    ScratchSpace c[BatchSize];
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        new (c + i) Container(in.begin(), in.end());
+        benchmark::DoNotOptimize(c + i);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        reinterpret_cast<Container*>(c + i)->~Container();
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::ctor(iterator, iterator) (sorted sequence)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Key> keys  = generate_unique_keys(size);
+    std::sort(keys.begin(), keys.end());
+    std::vector<Value> in = add_dummy_mapped_type(keys);
+    ScratchSpace c[BatchSize];
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        new (c + i) Container(in.begin(), in.end());
+        benchmark::DoNotOptimize(c + i);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        reinterpret_cast<Container*>(c + i)->~Container();
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  /////////////////////////
+  // Assignment
+  /////////////////////////
+  benchmark::RegisterBenchmark(container + "::operator=(const&)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Container src(in.begin(), in.end());
+    Container c[BatchSize];
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i] = src;
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].clear();
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  /////////////////////////
+  // Insertion
+  /////////////////////////
+  benchmark::RegisterBenchmark(container + "::insert(value) (already present)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Value to_insert        = in[in.size() / 2]; // pick any existing value
+    std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].insert(to_insert);
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      // There is no cleanup to do, since associative containers don't insert
+      // if the key is already present.
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::insert(value) (new value)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size + 1));
+    Value to_insert        = in.back();
+    in.pop_back();
+    std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].insert(to_insert);
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].erase(get_key(to_insert));
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::insert(hint, value) (good hint)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size + 1));
+    Value to_insert        = in.back();
+    in.pop_back();
+
+    std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+    typename Container::iterator hints[BatchSize];
+    for (int i = 0; i != BatchSize; ++i) {
+      hints[i] = c[i].lower_bound(get_key(to_insert));
+    }
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].insert(hints[i], to_insert);
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].erase(get_key(to_insert));
+        hints[i] = c[i].lower_bound(get_key(to_insert)); // refresh hints in case of invalidation
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::insert(hint, value) (bad hint)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size + 1));
+    Value to_insert        = in.back();
+    in.pop_back();
+    std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].insert(c[i].begin(), to_insert);
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].erase(get_key(to_insert));
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::insert(iterator, iterator) (all new keys)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size + (size / 10)));
+
+    // Populate a container with a small number of elements, that's what containers will start with.
+    std::vector<Value> small;
+    for (int i = 0; i != (size / 10); ++i) {
+      small.push_back(in.back());
+      in.pop_back();
+    }
+    Container c(small.begin(), small.end());
+
+    for (auto _ : st) {
+      c.insert(in.begin(), in.end());
+      benchmark::DoNotOptimize(c);
+      benchmark::ClobberMemory();
+
+      st.PauseTiming();
+      c = Container(small.begin(), small.end());
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::insert(iterator, iterator) (half new keys)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+
+    // Populate a container that already contains half the elements we'll try inserting,
+    // that's what our container will start with.
+    std::vector<Value> small;
+    for (int i = 0; i != size / 2; ++i) {
+      small.push_back(in.at(i * 2));
+    }
+    Container c(small.begin(), small.end());
+
+    for (auto _ : st) {
+      c.insert(in.begin(), in.end());
+      benchmark::DoNotOptimize(c);
+      benchmark::ClobberMemory();
+
+      st.PauseTiming();
+      c = Container(small.begin(), small.end());
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  /////////////////////////
+  // Erasure
+  /////////////////////////
+  benchmark::RegisterBenchmark(container + "::erase(key) (existent)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Value element          = in[in.size() / 2]; // pick any element
+    std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].erase(get_key(element));
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].insert(element);
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::erase(key) (non-existent)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size + 1));
+    Value element          = in.back();
+    in.pop_back();
+    Container c(in.begin(), in.end());
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c.erase(get_key(element));
+        benchmark::DoNotOptimize(c);
+        benchmark::ClobberMemory();
+      }
+
+      // no cleanup required because we erased a non-existent element
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::erase(iterator)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Value element          = in[in.size() / 2]; // pick any element
+
+    std::vector<Container> c;
+    std::vector<typename Container::iterator> iterators;
+    for (int i = 0; i != BatchSize; ++i) {
+      c.push_back(Container(in.begin(), in.end()));
+      iterators.push_back(c[i].find(get_key(element)));
+    }
+
+    while (st.KeepRunningBatch(BatchSize)) {
+      for (int i = 0; i != BatchSize; ++i) {
+        c[i].erase(iterators[i]);
+        benchmark::DoNotOptimize(c[i]);
+        benchmark::ClobberMemory();
+      }
+
+      st.PauseTiming();
+      for (int i = 0; i != BatchSize; ++i) {
+        iterators[i] = c[i].insert(element).first;
+      }
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::erase(iterator, iterator) (erase half the container)", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Container c(in.begin(), in.end());
+
+    auto first = std::next(c.begin(), c.size() / 4);
+    auto last  = std::next(c.begin(), 3 * (c.size() / 4));
+    for (auto _ : st) {
+      c.erase(first, last);
+      benchmark::DoNotOptimize(c);
+      benchmark::ClobberMemory();
+
+      st.PauseTiming();
+      c     = Container(in.begin(), in.end());
+      first = std::next(c.begin(), c.size() / 4);
+      last  = std::next(c.begin(), 3 * (c.size() / 4));
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  benchmark::RegisterBenchmark(container + "::clear()", [=](auto& st) {
+    const std::size_t size = st.range(0);
+    std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+    Container c(in.begin(), in.end());
+
+    for (auto _ : st) {
+      c.clear();
+      benchmark::DoNotOptimize(c);
+      benchmark::ClobberMemory();
+
+      st.PauseTiming();
+      c = Container(in.begin(), in.end());
+      st.ResumeTiming();
+    }
+  })->Arg(1024);
+
+  /////////////////////////
+  // Query
+  /////////////////////////
+  auto bench_with_existent_key = [=](auto func) {
+    return [=](auto& st) {
+      const std::size_t size = st.range(0);
+      std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size));
+      Value element          = in[in.size() / 2]; // pick any element
+      Container c(in.begin(), in.end());
+
+      while (st.KeepRunningBatch(BatchSize)) {
+        for (int i = 0; i != BatchSize; ++i) {
+          auto result = func(c, element);
+          benchmark::DoNotOptimize(c);
+          benchmark::DoNotOptimize(result);
+          benchmark::ClobberMemory();
+        }
+      }
+    };
+  };
+
+  auto bench_with_nonexistent_key = [=](auto func) {
+    return [=](auto& st) {
+      const std::size_t size = st.range(0);
+      std::vector<Value> in  = add_dummy_mapped_type(generate_unique_keys(size + 1));
+      Value element          = in.back();
+      in.pop_back();
+      Container c(in.begin(), in.end());
+
+      while (st.KeepRunningBatch(BatchSize)) {
+        for (int i = 0; i != BatchSize; ++i) {
+          auto result = func(c, element);
+          benchmark::DoNotOptimize(c);
+          benchmark::DoNotOptimize(result);
+          benchmark::ClobberMemory();
+        }
+      }
+    };
+  };
+
+  benchmark::RegisterBenchmark(
+      container + "::find(key) (existent)",
+      bench_with_existent_key([=](Container const& c, Value const& element) { return c.find(get_key(element)); }))
+      ->Arg(1024);
+  benchmark::RegisterBenchmark(
+      container + "::find(key) (non-existent)",
+      bench_with_nonexistent_key([=](Container const& c, Value const& element) { return c.find(get_key(element)); }))
+      ->Arg(1024);
+
+  benchmark::RegisterBenchmark(
+      container + "::count(key) (existent)",
+      bench_with_existent_key([=](Container const& c, Value const& element) { return c.count(get_key(element)); }))
+      ->Arg(1024);
+  benchmark::RegisterBenchmark(
+      container + "::count(key) (non-existent)",
+      bench_with_nonexistent_key([=](Container const& c, Value const& element) { return c.count(get_key(element)); }))
+      ->Arg(1024);
+
+  benchmark::RegisterBenchmark(
+      container + "::contains(key) (existent)",
+      bench_with_existent_key([=](Container const& c, Value const& element) { return c.contains(get_key(element)); }))
+      ->Arg(1024);
+  benchmark::RegisterBenchmark(
+      container + "::contains(key) (non-existent)",
+      bench_with_nonexistent_key([=](Container const& c, Value const& element) {
+        return c.contains(get_key(element));
+      }))
+      ->Arg(1024);
+
+  benchmark::RegisterBenchmark(
+      container + "::lower_bound(key) (existent)",
+      bench_with_existent_key([=](Container const& c, Value const& element) {
+        return c.lower_bound(get_key(element));
+      }))
+      ->Arg(1024);
+  benchmark::RegisterBenchmark(
+      container + "::lower_bound(key) (non-existent)",
+      bench_with_nonexistent_key([=](Container const& c, Value const& element) {
+        return c.lower_bound(get_key(element));
+      }))
+      ->Arg(1024);
+
+  benchmark::RegisterBenchmark(
+      container + "::upper_bound(key) (existent)",
+      bench_with_existent_key([=](Container const& c, Value const& element) {
+        return c.upper_bound(get_key(element));
+      }))
+      ->Arg(1024);
+  benchmark::RegisterBenchmark(
+      container + "::upper_bound(key) (non-existent)",
+      bench_with_nonexistent_key([=](Container const& c, Value const& element) {
+        return c.upper_bound(get_key(element));
+      }))
+      ->Arg(1024);
+
+  benchmark::RegisterBenchmark(
+      container + "::equal_range(key) (existent)",
+      bench_with_existent_key([=](Container const& c, Value const& element) {
+        return c.equal_range(get_key(element));
+      }))
+      ->Arg(1024);
+  benchmark::RegisterBenchmark(
+      container + "::equal_range(key) (non-existent)",
+      bench_with_nonexistent_key([=](Container const& c, Value const& element) {
+        return c.equal_range(get_key(element));
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/123663


More information about the libcxx-commits mailing list