[libcxx-commits] [libcxx] [libcxx] Avoid hash key in __hash_table::find() if no buckets yet. (PR #126837)
via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Feb 13 16:56:23 PST 2025
https://github.com/xbcnn updated https://github.com/llvm/llvm-project/pull/126837
>From 0f93eba81b388211c80da1eeb2f9ad35a9f75947 Mon Sep 17 00:00:00 2001
From: yangxiaobing <yangxiaobing at jwzg.com>
Date: Tue, 11 Feb 2025 18:16:09 +0800
Subject: [PATCH 1/3] [libcxx] Avoid hash key in __hash_table::find() if no
buckets yet.
If the hash table has no buckets yet, it's empty and the find will do fast
return end(). Then compute hash key is useless and can be avoided, since
it could be expensive for some key types, such as long string.
This is a small optimization but useful in cases like a checklist (
implemented as unordered_set) that is mostly empty.
---
libcxx/include/__hash_table | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d7b312f8774fc..a1d06d07f7c8d 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -1771,9 +1771,9 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <class _Key>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
__hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) {
- size_t __hash = hash_function()(__k);
size_type __bc = bucket_count();
if (__bc != 0) {
+ size_t __hash = hash_function()(__k);
size_t __chash = std::__constrain_hash(__hash, __bc);
__next_pointer __nd = __bucket_list_[__chash];
if (__nd != nullptr) {
@@ -1792,9 +1792,9 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
template <class _Key>
typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator
__hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const {
- size_t __hash = hash_function()(__k);
size_type __bc = bucket_count();
if (__bc != 0) {
+ size_t __hash = hash_function()(__k);
size_t __chash = std::__constrain_hash(__hash, __bc);
__next_pointer __nd = __bucket_list_[__chash];
if (__nd != nullptr) {
>From a2f7b3c549b63ce9669c025c863980da27481b50 Mon Sep 17 00:00:00 2001
From: yangxiaobing <yangxiaobing at jwzg.com>
Date: Thu, 13 Feb 2025 16:33:13 +0800
Subject: [PATCH 2/3] Add benchmarks.
---
.../associative/hash_table_find.bench.cpp | 70 +++++++++++++++++++
1 file changed, 70 insertions(+)
create mode 100644 libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp
diff --git a/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp b/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp
new file mode 100644
index 0000000000000..e6f0ae79caef3
--- /dev/null
+++ b/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp
@@ -0,0 +1,70 @@
+#include <unordered_set>
+#include <string>
+#include <random>
+#include <vector>
+
+#include "../../GenerateInput.h"
+#include "benchmark/benchmark.h"
+
+// Generate random strings of at least 32 chars
+struct LongStringGenerator {
+ static std::vector<std::string> cached_strings;
+
+ static void ensure_strings(size_t count) {
+ cached_strings.clear();
+
+ std::mt19937_64 gen(42); // Fixed seed for reproducibility
+ std::uniform_int_distribution<size_t> len_dist(32, 128);
+
+ cached_strings.reserve(count);
+ for (size_t i = 0; i < count; i++) {
+ std::string str(len_dist(gen), 0);
+ for (char& c : str) {
+ c = 'a' + (gen() % 26);
+ }
+ cached_strings.push_back(std::move(str));
+ }
+ }
+
+ const std::string& generate(size_t i) { return cached_strings[i]; }
+};
+
+std::vector<std::string> LongStringGenerator::cached_strings;
+[[maybe_unused]] auto dummy = [] { // Pre-generate 32K strings
+ LongStringGenerator::ensure_strings(1 << 15);
+ return 0;
+}();
+
+template <class Gen>
+static void BM_UnorderedSet_Find_EmptySet(benchmark::State& state, Gen g) {
+ const size_t lookup_count = state.range(0);
+ std::unordered_set<std::string> s; // Empty set
+
+ for (auto _ : state) {
+ for (size_t i = 0; i < lookup_count; i++) {
+ benchmark::DoNotOptimize(s.find(g.generate(i)));
+ }
+ }
+}
+
+template <class Gen>
+static void BM_UnorderedSet_Find(benchmark::State& state, Gen g) {
+ const size_t lookup_count = state.range(0);
+ std::unordered_set<std::string> s{"hello"}; // Non-empty set
+
+ for (auto _ : state) {
+ for (size_t i = 0; i < lookup_count; i++) {
+ benchmark::DoNotOptimize(s.find(g.generate(i)));
+ }
+ }
+}
+
+BENCHMARK_CAPTURE(BM_UnorderedSet_Find_EmptySet, long_string, LongStringGenerator())
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 15); // Test from 1K to 32K lookups
+
+BENCHMARK_CAPTURE(BM_UnorderedSet_Find, long_string, LongStringGenerator())
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 15); // Test from 1K to 32K lookups
+
+BENCHMARK_MAIN();
>From 96ef4a128145d14e093f878258e5f49f4f7b573d Mon Sep 17 00:00:00 2001
From: yangxiaobing <yangxiaobing at jwzg.com>
Date: Fri, 14 Feb 2025 08:52:59 +0800
Subject: [PATCH 3/3] Fix clang-format issue.
---
.../benchmarks/containers/associative/hash_table_find.bench.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp b/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp
index e6f0ae79caef3..93fddac02ee6f 100644
--- a/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp
+++ b/libcxx/test/benchmarks/containers/associative/hash_table_find.bench.cpp
@@ -50,7 +50,7 @@ static void BM_UnorderedSet_Find_EmptySet(benchmark::State& state, Gen g) {
template <class Gen>
static void BM_UnorderedSet_Find(benchmark::State& state, Gen g) {
const size_t lookup_count = state.range(0);
- std::unordered_set<std::string> s{"hello"}; // Non-empty set
+ std::unordered_set<std::string> s{"hello"}; // Non-empty set
for (auto _ : state) {
for (size_t i = 0; i < lookup_count; i++) {
More information about the libcxx-commits
mailing list