[libcxx-commits] [libcxx] [libc++] Speed up classic locale (PR #70631)
via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Oct 30 00:42:59 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Dmitry Vyukov (dvyukov)
<details>
<summary>Changes</summary>
Locale objects use atomic reference counting, which may be very expensive in parallel applications. The classic locale is used by default by all streams and can be very contended. But it's never destroyed, so the reference counting is also completely pointless on the classic locale. Currently ~70% of time in the parallel stringstream benchmarks is spent in locale ctor/dtor. And the execution radically slows down with more threads.
Avoid reference counting on the classic locale and inline common ctors/dtor. With this change locale ctor/dtor time become negligible and the benchmark starts to scale with threads.
```
│baseline sec/op│ optimized sec/op │
Ostream_number/threads:1 184.5n ± 0% 133.0n ± 1% -27.91% (p=0.000 n=30)
Ostream_number/threads:72 24188.0n ± 3% 321.0n ± 2% -98.67% (p=0.000 n=30)
Istream_numbers/1024/threads:1 4.667µ ± 1% 4.273µ ± 0% -8.43% (p=0.000 n=30)
Istream_numbers/1024/threads:72 559.657µ ± 1% 9.350µ ± 1% -98.33% (p=0.000 n=30)
```
---
Full diff: https://github.com/llvm/llvm-project/pull/70631.diff
3 Files Affected:
- (modified) libcxx/benchmarks/stringstream.bench.cpp (+11-1)
- (modified) libcxx/include/__locale (+41-1)
- (modified) libcxx/src/locale.cpp (+41-50)
``````````diff
diff --git a/libcxx/benchmarks/stringstream.bench.cpp b/libcxx/benchmarks/stringstream.bench.cpp
index ea602557ccd770e..866c3efe2bbf58d 100644
--- a/libcxx/benchmarks/stringstream.bench.cpp
+++ b/libcxx/benchmarks/stringstream.bench.cpp
@@ -25,6 +25,16 @@ static void BM_Istream_numbers(benchmark::State& state) {
while (state.KeepRunning())
benchmark::DoNotOptimize(i += istream_numbers());
}
+BENCHMARK(BM_Istream_numbers)->RangeMultiplier(2)->Range(1024, 4096)->
+ UseRealTime()->Threads(1)->ThreadPerCpu();
+
+static void BM_Ostream_number(benchmark::State& state) {
+ while (state.KeepRunning()) {
+ std::ostringstream ss;
+ ss << 0;
+ benchmark::DoNotOptimize(ss.str().c_str());
+ }
+}
+BENCHMARK(BM_Ostream_number)->UseRealTime()->Threads(1)->ThreadPerCpu();
-BENCHMARK(BM_Istream_numbers)->RangeMultiplier(2)->Range(1024, 4096);
BENCHMARK_MAIN();
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 90dcad3590c3d21..123c7000b9ff99e 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -126,9 +126,15 @@ public:
private:
class __imp;
__imp* __locale_;
+ static __imp* __classic_;
void __install_ctor(const locale&, facet*, long);
- static locale& __global();
+ static __imp*& __global();
+ static __imp* __make_global();
+ static __imp* __maybe_acquire(__imp* __i);
+ static __imp* __do_acquire(__imp* __i);
+ static void __maybe_release(__imp* __i);
+ static void __do_release(__imp* __i);
bool has_facet(id&) const;
const facet* use_facet(id&) const;
@@ -136,6 +142,40 @@ private:
template <class _Facet> friend const _Facet& use_facet(const locale&);
};
+inline locale::locale() _NOEXCEPT
+ : __locale_(__maybe_acquire(__global()))
+{
+}
+
+inline locale::locale(const locale& l) _NOEXCEPT
+ : __locale_(__maybe_acquire(l.__locale_))
+{
+}
+
+inline locale::~locale()
+{
+ __maybe_release(__locale_);
+}
+
+inline locale::__imp*& locale::__global()
+{
+ static __imp* __g = __make_global();
+ return __g;
+}
+
+inline locale::__imp* locale::__maybe_acquire(__imp* __i)
+{
+ if (__i != __classic_)
+ __do_acquire(__i);
+ return __i;
+}
+
+inline void locale::__maybe_release(__imp* __i)
+{
+ if (__i != __classic_)
+ __do_release(__i);
+}
+
class _LIBCPP_EXPORTED_FROM_ABI locale::facet
: public __shared_count
{
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 317b4dec7d241e5..a5d883718aea3b0 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -154,7 +154,6 @@ class _LIBCPP_HIDDEN locale::__imp
const locale::facet* use_facet(long id) const;
static const locale& make_classic();
- static locale& make_global();
private:
void install(facet* f, long id);
template <class F> void install(F* f) {install(f, f->id.__get());}
@@ -537,6 +536,8 @@ locale::__imp::use_facet(long id) const
// locale
+locale::__imp* locale::__classic_;
+
const locale&
locale::__imp::make_classic()
{
@@ -544,6 +545,16 @@ locale::__imp::make_classic()
static aligned_storage<sizeof(locale)>::type buf;
locale* c = reinterpret_cast<locale*>(&buf);
c->__locale_ = &make<__imp>(1u);
+ // We don't do reference counting on the classic locale.
+ // It's never destroyed anyway, but atomic reference counting may be very
+ // expensive in parallel applications. The classic locale is used by default
+ // in all streams. Note: if a new global locale is installed, then we lose
+ // the benefit of no reference counting. Potentially we can omit reference
+ // counting on all locales that are ever installed as global (leak them).
+ // Programs are not expected to install unbounded number of unique global
+ // locales, and global locale cannot be installed if any threads are running
+ // so real programs shouldn't install them at all.
+ c->__classic_ = c->__locale_;
return *c;
}
@@ -554,78 +565,58 @@ locale::classic()
return c;
}
-locale&
-locale::__imp::make_global()
+locale::__imp*
+locale::__make_global()
{
// only one thread can get in here and it only gets in once
- static aligned_storage<sizeof(locale)>::type buf;
- auto *obj = ::new (&buf) locale(locale::classic());
- return *obj;
-}
-
-locale&
-locale::__global()
-{
- static locale& g = __imp::make_global();
- return g;
-}
-
-locale::locale() noexcept
- : __locale_(__global().__locale_)
-{
- __locale_->__add_shared();
-}
-
-locale::locale(const locale& l) noexcept
- : __locale_(l.__locale_)
-{
- __locale_->__add_shared();
-}
-
-locale::~locale()
-{
- __locale_->__release_shared();
+ return classic().__locale_;
}
const locale&
locale::operator=(const locale& other) noexcept
{
- other.__locale_->__add_shared();
- __locale_->__release_shared();
+ __maybe_acquire(other.__locale_);
+ __maybe_release(__locale_);
__locale_ = other.__locale_;
return *this;
}
locale::locale(const char* name)
- : __locale_(name ? new __imp(name)
- : (__throw_runtime_error("locale constructed with null"), nullptr))
+ : __locale_(__do_acquire(name ? new __imp(name)
+ : (__throw_runtime_error("locale constructed with null"), nullptr)))
{
- __locale_->__add_shared();
}
locale::locale(const string& name)
- : __locale_(new __imp(name))
+ : __locale_(__do_acquire(new __imp(name)))
{
- __locale_->__add_shared();
}
locale::locale(const locale& other, const char* name, category c)
- : __locale_(name ? new __imp(*other.__locale_, name, c)
- : (__throw_runtime_error("locale constructed with null"), nullptr))
+ : __locale_(__do_acquire(name ? new __imp(*other.__locale_, name, c)
+ : (__throw_runtime_error("locale constructed with null"), nullptr)))
{
- __locale_->__add_shared();
}
locale::locale(const locale& other, const string& name, category c)
- : __locale_(new __imp(*other.__locale_, name, c))
+ : __locale_(__do_acquire(new __imp(*other.__locale_, name, c)))
{
- __locale_->__add_shared();
}
locale::locale(const locale& other, const locale& one, category c)
- : __locale_(new __imp(*other.__locale_, *one.__locale_, c))
+ : __locale_(__do_acquire(new __imp(*other.__locale_, *one.__locale_, c)))
+{
+}
+
+locale::__imp* locale::__do_acquire(__imp* __i)
+{
+ __i->__add_shared();
+ return __i;
+}
+
+void locale::__do_release(__imp* __i)
{
- __locale_->__add_shared();
+ __i->__release_shared();
}
string
@@ -641,17 +632,17 @@ locale::__install_ctor(const locale& other, facet* f, long id)
__locale_ = new __imp(*other.__locale_, f, id);
else
__locale_ = other.__locale_;
- __locale_->__add_shared();
+ __maybe_acquire(__locale_);
}
locale
locale::global(const locale& loc)
{
- locale& g = __global();
- locale r = g;
- g = loc;
- if (g.name() != "*")
- setlocale(LC_ALL, g.name().c_str());
+ __imp*& g = __global();
+ locale r = loc;
+ swap(g, r.__locale_);
+ if (g->name() != "*")
+ setlocale(LC_ALL, g->name().c_str());
return r;
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/70631
More information about the libcxx-commits
mailing list