[libcxx-commits] [libcxx] 3efa461 - [libcxx] Avoid hash key in __hash_table::find() if it is empty. (#126837)

via libcxx-commits libcxx-commits at lists.llvm.org
Thu Jul 3 00:39:14 PDT 2025


Author: xbcnn
Date: 2025-07-03T09:39:06+02:00
New Revision: 3efa461d45a1867cf03d30bd4b6caf1ed2260475

URL: https://github.com/llvm/llvm-project/commit/3efa461d45a1867cf03d30bd4b6caf1ed2260475
DIFF: https://github.com/llvm/llvm-project/commit/3efa461d45a1867cf03d30bd4b6caf1ed2260475.diff

LOG: [libcxx] Avoid hash key in __hash_table::find() if it is empty. (#126837)

If the hash table is empty, with or without buckets, the find() can do
fast return. Then computing hash key is useless and avoidable, since it
could be expensive for some key types, such as long strings.

This is a small optimization but useful in cases like a checklist
(unordered_set/map) which is mostly empty.

```
For std::unordered_set<*>, `--benchmark_filter=find`
1. With the opt:

---------------------------------------------------------------------------------------------------------
Benchmark                                                               Time             CPU   Iterations
---------------------------------------------------------------------------------------------------------
std::unordered_set<int>::find(key) (existent)/0                     0.118 ns        0.118 ns   5939922720
std::unordered_set<int>::find(key) (existent)/32                     52.1 ns         52.1 ns     13287232
std::unordered_set<int>::find(key) (existent)/1024                   51.1 ns         51.1 ns     13449472
std::unordered_set<int>::find(key) (existent)/8192                   53.1 ns         53.1 ns     13420864
std::unordered_set<int>::find(key) (non-existent)/0                  14.7 ns         14.7 ns     47725472
std::unordered_set<int>::find(key) (non-existent)/32                 44.1 ns         44.1 ns     15478144
std::unordered_set<int>::find(key) (non-existent)/1024               41.2 ns         41.2 ns     15082464
std::unordered_set<int>::find(key) (non-existent)/8192               49.5 ns         49.5 ns     15233600
std::unordered_set<std::string>::find(key) (existent)/0             0.136 ns        0.136 ns   5157977920
std::unordered_set<std::string>::find(key) (existent)/32              739 ns          739 ns      1023744
std::unordered_set<std::string>::find(key) (existent)/1024            836 ns          836 ns       840448
std::unordered_set<std::string>::find(key) (existent)/8192            768 ns          768 ns      1085664
std::unordered_set<std::string>::find(key) (non-existent)/0          14.6 ns         14.6 ns     47844160
std::unordered_set<std::string>::find(key) (non-existent)/32          608 ns          608 ns      1106496
std::unordered_set<std::string>::find(key) (non-existent)/1024        646 ns          646 ns       986272
std::unordered_set<std::string>::find(key) (non-existent)/8192        669 ns          669 ns      1047584


2. Without the opt:

---------------------------------------------------------------------------------------------------------
Benchmark                                                               Time             CPU   Iterations
---------------------------------------------------------------------------------------------------------
std::unordered_set<int>::find(key) (existent)/0                     0.135 ns        0.135 ns   5188502304
std::unordered_set<int>::find(key) (existent)/32                     54.4 ns         54.4 ns     12954144
std::unordered_set<int>::find(key) (existent)/1024                   57.7 ns         57.7 ns     13107008
std::unordered_set<int>::find(key) (existent)/8192                   50.7 ns         50.7 ns     12953312
std::unordered_set<int>::find(key) (non-existent)/0                  16.1 ns         16.1 ns     43460192
std::unordered_set<int>::find(key) (non-existent)/32                 45.8 ns         45.8 ns     17139584
std::unordered_set<int>::find(key) (non-existent)/1024               44.6 ns         44.6 ns     16538048
std::unordered_set<int>::find(key) (non-existent)/8192               41.5 ns         41.5 ns     12850816
std::unordered_set<std::string>::find(key) (existent)/0             0.133 ns        0.133 ns   5214104992
std::unordered_set<std::string>::find(key) (existent)/32              731 ns          731 ns      1000576
std::unordered_set<std::string>::find(key) (existent)/1024            716 ns          716 ns      1131584
std::unordered_set<std::string>::find(key) (existent)/8192            745 ns          745 ns       909632
std::unordered_set<std::string>::find(key) (non-existent)/0           600 ns          600 ns      1089792
std::unordered_set<std::string>::find(key) (non-existent)/32          645 ns          645 ns       979232
std::unordered_set<std::string>::find(key) (non-existent)/1024        675 ns          675 ns       962240
std::unordered_set<std::string>::find(key) (non-existent)/8192        711 ns          711 ns      1054880

```

We can see the improvements when find() for non-existent
`std::string`(random size 1~1024) keys:
```
std::unordered_set<std::string>::find(key) (non-existent)/0          14.6 ns         14.6 ns     47844160
std::unordered_set<std::string>::find(key) (non-existent)/0           600 ns          600 ns      1089792
```

---------

Co-authored-by: yangxiaobing <yangxiaobing at jwzg.com>

Added: 
    

Modified: 
    libcxx/include/__hash_table
    libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
    libcxx/test/benchmarks/containers/associative/unordered_set.bench.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index cc5f6d1348e41..539ee33a2583e 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -1831,9 +1831,9 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <class _Key>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) {
-  size_t __hash  = hash_function()(__k);
   size_type __bc = bucket_count();
-  if (__bc != 0) {
+  if (__bc != 0 && size() != 0) {
+    size_t __hash       = hash_function()(__k);
     size_t __chash      = std::__constrain_hash(__hash, __bc);
     __next_pointer __nd = __bucket_list_[__chash];
     if (__nd != nullptr) {
@@ -1852,9 +1852,9 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <class _Key>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const {
-  size_t __hash  = hash_function()(__k);
   size_type __bc = bucket_count();
-  if (__bc != 0) {
+  if (__bc != 0 && size() != 0) {
+    size_t __hash       = hash_function()(__k);
     size_t __chash      = std::__constrain_hash(__hash, __bc);
     __next_pointer __nd = __bucket_list_[__chash];
     if (__nd != nullptr) {

diff  --git a/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h b/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
index 0e65f44fd483e..0ff7f15164d8a 100644
--- a/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
+++ b/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
@@ -59,7 +59,7 @@ void associative_container_benchmarks(std::string container) {
   auto get_key = [](Value const& v) { return adapt_operations<Container>::key_from_value(v); };
 
   auto bench = [&](std::string operation, auto f) {
-    benchmark::RegisterBenchmark(container + "::" + operation, f)->Arg(32)->Arg(1024)->Arg(8192);
+    benchmark::RegisterBenchmark(container + "::" + operation, f)->Arg(0)->Arg(32)->Arg(1024)->Arg(8192);
   };
 
   static constexpr bool is_multi_key_container =
@@ -176,7 +176,7 @@ void associative_container_benchmarks(std::string container) {
   // Insertion
   /////////////////////////
   bench("insert(value) (already present)", [=](auto& st) {
-    const std::size_t size = st.range(0);
+    const std::size_t size = st.range(0) ? st.range(0) : 1;
     std::vector<Value> in  = make_value_types(generate_unique_keys(size));
     Value to_insert        = in[in.size() / 2]; // pick any existing value
     std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
@@ -371,7 +371,7 @@ void associative_container_benchmarks(std::string container) {
   // Erasure
   /////////////////////////
   bench("erase(key) (existent)", [=](auto& st) {
-    const std::size_t size = st.range(0);
+    const std::size_t size = st.range(0) ? st.range(0) : 1; // avoid empty container
     std::vector<Value> in  = make_value_types(generate_unique_keys(size));
     Value element          = in[in.size() / 2]; // pick any element
     std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
@@ -415,7 +415,7 @@ void associative_container_benchmarks(std::string container) {
   });
 
   bench("erase(iterator)", [=](auto& st) {
-    const std::size_t size = st.range(0);
+    const std::size_t size = st.range(0) ? st.range(0) : 1; // avoid empty container
     std::vector<Value> in  = make_value_types(generate_unique_keys(size));
     Value element          = in[in.size() / 2]; // pick any element
 
@@ -494,7 +494,7 @@ void associative_container_benchmarks(std::string container) {
       Container c(in.begin(), in.end());
 
       while (st.KeepRunningBatch(BatchSize)) {
-        for (std::size_t i = 0; i != BatchSize; ++i) {
+        for (std::size_t i = 0; i != keys.size(); ++i) { // possible empty keys when Arg(0)
           auto result = func(c, keys[i]);
           benchmark::DoNotOptimize(c);
           benchmark::DoNotOptimize(result);

diff  --git a/libcxx/test/benchmarks/containers/associative/unordered_set.bench.cpp b/libcxx/test/benchmarks/containers/associative/unordered_set.bench.cpp
index 56420bdaadfbf..89443a597e85a 100644
--- a/libcxx/test/benchmarks/containers/associative/unordered_set.bench.cpp
+++ b/libcxx/test/benchmarks/containers/associative/unordered_set.bench.cpp
@@ -8,6 +8,7 @@
 
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
+#include <string>
 #include <unordered_set>
 #include <utility>
 
@@ -27,6 +28,7 @@ struct support::adapt_operations<std::unordered_set<K>> {
 
 int main(int argc, char** argv) {
   support::associative_container_benchmarks<std::unordered_set<int>>("std::unordered_set<int>");
+  support::associative_container_benchmarks<std::unordered_set<std::string>>("std::unordered_set<std::string>");
 
   benchmark::Initialize(&argc, argv);
   benchmark::RunSpecifiedBenchmarks();


        


More information about the libcxx-commits mailing list