[libcxx-commits] [libcxx] 11f22f1 - [tzdb] Replace shared_mutex with mutex. (#87929)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Apr 13 00:16:15 PDT 2024
Author: Eric
Date: 2024-04-13T03:16:11-04:00
New Revision: 11f22f1a963ab3c8949cb723a63c07d7a409c8a8
URL: https://github.com/llvm/llvm-project/commit/11f22f1a963ab3c8949cb723a63c07d7a409c8a8
DIFF: https://github.com/llvm/llvm-project/commit/11f22f1a963ab3c8949cb723a63c07d7a409c8a8.diff
LOG: [tzdb] Replace shared_mutex with mutex. (#87929)
The overhead of taking a std::mutex is much lower than taking a reader
lock on a shared mutex, even under heavy contention.
The benefit of shared_mutex only occurs as the amount of
time spent in the critical sections grows large enough.
In our case all we do is read a pointer and return the lock.
As a result, using a shared lock can be ~50%-100% slower
Here are the results for the provided benchmark on my machine:
```
2024-04-07T12:48:51-04:00
Running ./libcxx/benchmarks/shared_mutex_vs_mutex.libcxx.out
Run on (12 X 400 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x6)
L1 Instruction 32 KiB (x6)
L2 Unified 1024 KiB (x6)
L3 Unified 32768 KiB (x1)
Load Average: 2.70, 2.70, 1.63
---------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------
BM_shared_mutex/threads:1 13.9 ns 13.9 ns 50533700
BM_shared_mutex/threads:2 34.5 ns 68.9 ns 9957784
BM_shared_mutex/threads:4 38.4 ns 137 ns 4987772
BM_shared_mutex/threads:8 51.1 ns 358 ns 1974160
BM_shared_mutex/threads:32 57.1 ns 682 ns 1043648
BM_mutex/threads:1 5.54 ns 5.53 ns 125867422
BM_mutex/threads:2 15.5 ns 30.9 ns 21830116
BM_mutex/threads:4 15.4 ns 57.2 ns 12136920
BM_mutex/threads:8 19.3 ns 140 ns 4997080
BM_mutex/threads:32 20.8 ns 252 ns 2859808
```
Added:
libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp
Modified:
libcxx/benchmarks/CMakeLists.txt
libcxx/src/include/tzdb/tzdb_list_private.h
Removed:
################################################################################
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 928238c1ac69ba..527a2acf2d3b36 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -221,6 +221,7 @@ set(BENCHMARK_TESTS
map.bench.cpp
monotonic_buffer.bench.cpp
ordered_set.bench.cpp
+ shared_mutex_vs_mutex.bench.cpp
stop_token.bench.cpp
std_format_spec_string_unicode.bench.cpp
string.bench.cpp
diff --git a/libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp b/libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp
new file mode 100644
index 00000000000000..19d13b799cc137
--- /dev/null
+++ b/libcxx/benchmarks/shared_mutex_vs_mutex.bench.cpp
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This benchmark compares the performance of std::mutex and std::shared_mutex in contended scenarios.
+// it's meant to establish a baseline overhead for std::shared_mutex and std::mutex, and to help inform decisions about
+// which mutex to use when selecting a mutex type for a given use case.
+
+#include <atomic>
+#include <mutex>
+#include <numeric>
+#include <shared_mutex>
+#include <thread>
+
+#include "benchmark/benchmark.h"
+
+int global_value = 42;
+std::mutex m;
+std::shared_mutex sm;
+
+static void BM_shared_mutex(benchmark::State& state) {
+ for (auto _ : state) {
+ std::shared_lock<std::shared_mutex> lock(sm);
+ benchmark::DoNotOptimize(global_value);
+ }
+}
+
+static void BM_mutex(benchmark::State& state) {
+ for (auto _ : state) {
+ std::lock_guard<std::mutex> lock(m);
+ benchmark::DoNotOptimize(global_value);
+ }
+}
+
+BENCHMARK(BM_shared_mutex)->Threads(1)->Threads(2)->Threads(4)->Threads(8)->Threads(32);
+BENCHMARK(BM_mutex)->Threads(1)->Threads(2)->Threads(4)->Threads(8)->Threads(32);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/src/include/tzdb/tzdb_list_private.h b/libcxx/src/include/tzdb/tzdb_list_private.h
index 969b2b9f8a9f63..0be7b8e5a10058 100644
--- a/libcxx/src/include/tzdb/tzdb_list_private.h
+++ b/libcxx/src/include/tzdb/tzdb_list_private.h
@@ -15,8 +15,11 @@
#include <forward_list>
// When threads are not available the locking is not required.
+// When threads are available, we use std::mutex over std::shared_mutex
+// due to the increased overhead of std::shared_mutex.
+// See shared_mutex_vs_mutex.bench.cpp
#ifndef _LIBCPP_HAS_NO_THREADS
-# include <shared_mutex>
+# include <mutex>
#endif
#include "types_private.h"
@@ -56,7 +59,7 @@ class tzdb_list::__impl {
const tzdb& __front() const noexcept {
#ifndef _LIBCPP_HAS_NO_THREADS
- shared_lock __lock{__mutex_};
+ unique_lock __lock{__mutex_};
#endif
return __tzdb_.front();
}
@@ -72,7 +75,7 @@ class tzdb_list::__impl {
const_iterator __begin() const noexcept {
#ifndef _LIBCPP_HAS_NO_THREADS
- shared_lock __lock{__mutex_};
+ unique_lock __lock{__mutex_};
#endif
return __tzdb_.begin();
}
@@ -87,7 +90,7 @@ class tzdb_list::__impl {
void __load_no_lock() { chrono::__init_tzdb(__tzdb_.emplace_front(), __rules_.emplace_front()); }
#ifndef _LIBCPP_HAS_NO_THREADS
- mutable shared_mutex __mutex_;
+ mutable mutex __mutex_;
#endif
forward_list<tzdb> __tzdb_;
More information about the libcxx-commits
mailing list