[libcxx-commits] [libcxx] [libc++] Speed up classic locale (PR #72112)

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Thu Nov 23 14:22:18 PST 2023


https://github.com/ldionne updated https://github.com/llvm/llvm-project/pull/72112

>From e45ddc0e249b0b4f8987fd0b3d6d15e130df383b Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov at google.com>
Date: Mon, 13 Nov 2023 12:29:22 +0100
Subject: [PATCH 01/10] [libc++] Speed up classic locale
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Locale objects use atomic reference counting, which may be very expensive
in parallel applications. The classic locale is used by default by all
streams and can be very contended. But it's never destroyed, so the reference
counting is also completely pointless on the classic locale. Currently ~70%
of time in the parallel stringstream benchmarks is spent in locale ctor/dtor.
And the execution radically slows down with more threads.

Avoid reference counting on the classic locale. With this change parallel
benchmarks start to scale with threads.

                              │   baseline   │    optimized                            │
                              │    sec/op    │    sec/op      vs base                  │
Istream_numbers/0/threads:1      4.672µ ± 0%   4.419µ ± 0%     -5.42% (p=0.000 n=30+39)
Istream_numbers/0/threads:72   539.817µ ± 0%   9.842µ ± 1%    -98.18% (p=0.000 n=30+40)
Istream_numbers/1/threads:1      4.890µ ± 0%   4.750µ ± 0%     -2.85% (p=0.000 n=30+40)
Istream_numbers/1/threads:72     66.44µ ± 1%   10.14µ ± 1%    -84.74% (p=0.000 n=30+40)
Istream_numbers/2/threads:1      4.888µ ± 0%   4.746µ ± 0%     -2.92% (p=0.000 n=30+40)
Istream_numbers/2/threads:72     494.8µ ± 0%   410.2µ ± 1%    -17.11% (p=0.000 n=30+40)
Istream_numbers/3/threads:1      4.697µ ± 0%   4.695µ ± 5%          ~ (p=0.391 n=30+37)
Istream_numbers/3/threads:72     421.5µ ± 7%   421.9µ ± 9%          ~ (p=0.665 n=30)
Ostream_number/0/threads:1       183.0n ± 0%   141.0n ± 2%    -22.95% (p=0.000 n=30)
Ostream_number/0/threads:72    24196.5n ± 1%   343.5n ± 3%    -98.58% (p=0.000 n=30)
Ostream_number/1/threads:1       250.0n ± 0%   196.0n ± 2%    -21.60% (p=0.000 n=30)
Ostream_number/1/threads:72    16260.5n ± 0%   407.0n ± 2%    -97.50% (p=0.000 n=30)
Ostream_number/2/threads:1       254.0n ± 0%   196.0n ± 1%    -22.83% (p=0.000 n=30)
Ostream_number/2/threads:72      28.49µ ± 1%   18.89µ ± 5%    -33.72% (p=0.000 n=30)
Ostream_number/3/threads:1       185.0n ± 0%   185.0n ± 0%      0.00% (p=0.017 n=30)
Ostream_number/3/threads:72      19.38µ ± 4%   19.33µ ± 5%          ~ (p=0.425 n=30)
---
 libcxx/benchmarks/stringstream.bench.cpp | 62 ++++++++++++++++++++++--
 libcxx/src/locale.cpp                    | 47 ++++++++++++------
 2 files changed, 92 insertions(+), 17 deletions(-)

diff --git a/libcxx/benchmarks/stringstream.bench.cpp b/libcxx/benchmarks/stringstream.bench.cpp
index ea602557ccd770e..c10ee3a8cc5b83c 100644
--- a/libcxx/benchmarks/stringstream.bench.cpp
+++ b/libcxx/benchmarks/stringstream.bench.cpp
@@ -1,11 +1,12 @@
 #include "benchmark/benchmark.h"
 #include "test_macros.h"
 
+#include <mutex>
 #include <sstream>
 
 TEST_NOINLINE double istream_numbers();
 
-double istream_numbers() {
+double istream_numbers(std::locale* l) {
   const char* a[] = {"-6  69 -71  2.4882e-02 -100 101 -2.00005 5000000 -50000000",
                      "-25 71   7 -9.3262e+01 -100 101 -2.00005 5000000 -50000000",
                      "-14 53  46 -6.7026e-02 -100 101 -2.00005 5000000 -50000000"};
@@ -14,17 +15,72 @@ double istream_numbers() {
   double f1 = 0.0, f2 = 0.0, q = 0.0;
   for (int i = 0; i < 3; i++) {
     std::istringstream s(a[i]);
+    if (l)
+      s.imbue(*l);
     s >> a1 >> a2 >> a3 >> f1 >> a4 >> a5 >> f2 >> a6 >> a7;
     q += (a1 + a2 + a3 + a4 + a5 + a6 + a7 + f1 + f2) / 1000000;
   }
   return q;
 }
 
+struct LocaleSelector {
+  std::locale* imbue;
+  std::locale old;
+
+  LocaleSelector(benchmark::State& state) {
+    static std::mutex mu;
+    std::lock_guard l(mu);
+    switch (state.range(0)) {
+    case 0: {
+      old   = std::locale::global(std::locale::classic());
+      imbue = nullptr;
+      break;
+    }
+    case 1: {
+      old = std::locale::global(std::locale::classic());
+      thread_local std::locale l("en_US.UTF-8");
+      imbue = &l;
+      break;
+    }
+    case 2: {
+      old = std::locale::global(std::locale::classic());
+      static std::locale l("en_US.UTF-8");
+      imbue = &l;
+      break;
+    }
+    case 3: {
+      old   = std::locale::global(std::locale("en_US.UTF-8"));
+      imbue = nullptr;
+      break;
+    }
+    }
+  }
+
+  ~LocaleSelector() {
+    static std::mutex mu;
+    std::lock_guard l(mu);
+    std::locale::global(old);
+  }
+};
+
 static void BM_Istream_numbers(benchmark::State& state) {
+  LocaleSelector sel(state);
   double i = 0;
   while (state.KeepRunning())
-    benchmark::DoNotOptimize(i += istream_numbers());
+    benchmark::DoNotOptimize(i += istream_numbers(sel.imbue));
+}
+BENCHMARK(BM_Istream_numbers)->DenseRange(0, 3)->UseRealTime()->Threads(1)->ThreadPerCpu();
+
+static void BM_Ostream_number(benchmark::State& state) {
+  LocaleSelector sel(state);
+  while (state.KeepRunning()) {
+    std::ostringstream ss;
+    if (sel.imbue)
+      ss.imbue(*sel.imbue);
+    ss << 0;
+    benchmark::DoNotOptimize(ss.str().c_str());
+  }
 }
+BENCHMARK(BM_Ostream_number)->DenseRange(0, 3)->UseRealTime()->Threads(1)->ThreadPerCpu();
 
-BENCHMARK(BM_Istream_numbers)->RangeMultiplier(2)->Range(1024, 4096);
 BENCHMARK_MAIN();
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index f3b281d98825e44..64e202677dd404c 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
+#include <atomic>
 #include <clocale>
 #include <codecvt>
 #include <cstddef>
@@ -81,7 +82,7 @@ locale_t __cloc() {
 
 namespace {
 
-struct release
+struct releaser
 {
     void operator()(locale::facet* p) {p->__release_shared();}
 };
@@ -155,10 +156,13 @@ class _LIBCPP_HIDDEN locale::__imp
         {return static_cast<size_t>(id) < facets_.size() && facets_[static_cast<size_t>(id)];}
     const locale::facet* use_facet(long id) const;
 
+    void acquire();
+    void release();
 private:
     void install(facet* f, long id);
     template <class F> void install(F* f) {install(f, f->id.__get());}
     template <class F> void install_from(const __imp& other);
+    static std::atomic<__imp*> classic_;
 };
 
 locale::__imp::__imp(size_t refs)
@@ -500,7 +504,7 @@ locale::__imp::__imp(const __imp& other, facet* f, long id)
       name_("*")
 {
     f->__add_shared();
-    unique_ptr<facet, release> hold(f);
+    unique_ptr<facet, releaser> hold(f);
     facets_ = other.facets_;
     for (unsigned i = 0; i < other.facets_.size(); ++i)
         if (facets_[i])
@@ -519,7 +523,7 @@ void
 locale::__imp::install(facet* f, long id)
 {
     f->__add_shared();
-    unique_ptr<facet, release> hold(f);
+    unique_ptr<facet, releaser> hold(f);
     if (static_cast<size_t>(id) >= facets_.size())
         facets_.resize(static_cast<size_t>(id+1));
     if (facets_[static_cast<size_t>(id)])
@@ -554,38 +558,53 @@ struct __no_destroy {
     alignas(T) byte buf[sizeof(T)];
 };
 
+std::atomic<locale::__imp*> locale::__imp::classic_;
+
 const locale& locale::classic() {
     static const __no_destroy<locale> c(__private_tag{}, &make<__imp>(1u));
+    // TODO:
+    classic_.store(c->__locale_, std::memory_order_relaxed);
     return c.get();
 }
 
 locale& locale::__global() {
     static __no_destroy<locale> g(locale::classic());
     return g.get();
+
+void locale::__imp::acquire()
+{
+    if (this != classic_.load(std::memory_order_relaxed))
+        __add_shared();
+}
+
+void locale::__imp::release()
+{
+    if (this != classic_.load(std::memory_order_relaxed))
+        __release_shared();
 }
 
 locale::locale() noexcept
     : __locale_(__global().__locale_)
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale::locale(const locale& l) noexcept
     : __locale_(l.__locale_)
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale::~locale()
 {
-    __locale_->__release_shared();
+    __locale_->release();
 }
 
 const locale&
 locale::operator=(const locale& other) noexcept
 {
-    other.__locale_->__add_shared();
-    __locale_->__release_shared();
+    other.__locale_->acquire();
+    __locale_->release();
     __locale_ = other.__locale_;
     return *this;
 }
@@ -594,32 +613,32 @@ locale::locale(const char* name)
     : __locale_(name ? new __imp(name)
                      : (__throw_runtime_error("locale constructed with null"), nullptr))
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale::locale(const string& name)
     : __locale_(new __imp(name))
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale::locale(const locale& other, const char* name, category c)
     : __locale_(name ? new __imp(*other.__locale_, name, c)
                      : (__throw_runtime_error("locale constructed with null"), nullptr))
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale::locale(const locale& other, const string& name, category c)
     : __locale_(new __imp(*other.__locale_, name, c))
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale::locale(const locale& other, const locale& one, category c)
     : __locale_(new __imp(*other.__locale_, *one.__locale_, c))
 {
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 string
@@ -635,7 +654,7 @@ locale::__install_ctor(const locale& other, facet* f, long id)
         __locale_ = new __imp(*other.__locale_, f, id);
     else
         __locale_ = other.__locale_;
-    __locale_->__add_shared();
+    __locale_->acquire();
 }
 
 locale

>From 8199841d8af2424f50c25f82caf18b968413170c Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov at google.com>
Date: Thu, 16 Nov 2023 15:02:16 +0100
Subject: [PATCH 02/10] add more comments

---
 libcxx/src/locale.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 64e202677dd404c..998fe12eef4a6b6 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -162,6 +162,12 @@ class _LIBCPP_HIDDEN locale::__imp
     void install(facet* f, long id);
     template <class F> void install(F* f) {install(f, f->id.__get());}
     template <class F> void install_from(const __imp& other);
+
+    // We don't do reference counting on the classic locale.
+    // It's never destroyed anyway, but atomic reference counting may be very
+    // expensive in parallel applications. The classic locale is used by default
+    // in all streams. Note: if a new global locale is installed, then we lose
+    // the benefit of no reference counting.
     static std::atomic<__imp*> classic_;
 };
 
@@ -563,6 +569,15 @@ std::atomic<locale::__imp*> locale::__imp::classic_;
 const locale& locale::classic() {
     static const __no_destroy<locale> c(__private_tag{}, &make<__imp>(1u));
     // TODO:
+    // We use relaxed memory ordering because readers don't access
+    // the contents of the objects, they are interested in just the
+    // pointer value.
+    // If a locale uses the classic imp, then this store happens
+    // before acquire/release methods, and they must observe the
+    // right value and omit reference counting.
+    // If a locale uses a non-classic imp, then it does not matter
+    // what value it will load, the result of the comparison will
+    // be false in all cases.
     classic_.store(c->__locale_, std::memory_order_relaxed);
     return c.get();
 }

>From 905907b69098f116da4eb685599abe1c7c89c22b Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 10:35:36 -0500
Subject: [PATCH 03/10] Adjust after rebase

---
 libcxx/src/locale.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 998fe12eef4a6b6..946b85b45aef43d 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -567,18 +567,20 @@ struct __no_destroy {
 std::atomic<locale::__imp*> locale::__imp::classic_;
 
 const locale& locale::classic() {
-    static const __no_destroy<locale> c(__private_tag{}, &make<__imp>(1u));
-    // TODO:
-    // We use relaxed memory ordering because readers don't access
-    // the contents of the objects, they are interested in just the
-    // pointer value.
-    // If a locale uses the classic imp, then this store happens
-    // before acquire/release methods, and they must observe the
-    // right value and omit reference counting.
-    // If a locale uses a non-classic imp, then it does not matter
-    // what value it will load, the result of the comparison will
-    // be false in all cases.
-    classic_.store(c->__locale_, std::memory_order_relaxed);
+    static const __no_destroy<locale> c(__private_tag{}, []{
+        __imp* ptr = &make<__imp>(1u);
+        // We use relaxed memory ordering because readers don't access
+        // the contents of the objects, they are interested in just the
+        // pointer value.
+        // If a locale uses the classic imp, then this store happens
+        // before acquire/release methods, and they must observe the
+        // right value and omit reference counting.
+        // If a locale uses a non-classic imp, then it does not matter
+        // what value it will load, the result of the comparison will
+        // be false in all cases.
+        classic_.store(ptr, std::memory_order_relaxed);
+        return ptr;
+    }());
     return c.get();
 }
 

>From 3a4039ddbefe6987c9c5b0ed7b2dd3fee5486f1a Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 11:10:28 -0500
Subject: [PATCH 04/10] Implement suggestion without needing atomics

---
 libcxx/include/CMakeLists.txt         |  1 +
 libcxx/include/__locale               |  1 +
 libcxx/include/__utility/no_destroy.h | 54 +++++++++++++++++++
 libcxx/src/locale.cpp                 | 75 +++++++++------------------
 4 files changed, 80 insertions(+), 51 deletions(-)
 create mode 100644 libcxx/include/__utility/no_destroy.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 889d7fedbf2965f..521c586c0e07957 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -850,6 +850,7 @@ set(files
   __utility/integer_sequence.h
   __utility/is_pointer_in_range.h
   __utility/move.h
+  __utility/no_destroy.h
   __utility/pair.h
   __utility/piecewise_construct.h
   __utility/priority_tag.h
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index fcae49e23fac59e..dbb9a1f1c81b3b0 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -15,6 +15,7 @@
 #include <__memory/shared_ptr.h> // __shared_count
 #include <__mutex/once_flag.h>
 #include <__type_traits/make_unsigned.h>
+#include <__utility/no_destroy.h>
 #include <cctype>
 #include <clocale>
 #include <cstdint>
diff --git a/libcxx/include/__utility/no_destroy.h b/libcxx/include/__utility/no_destroy.h
new file mode 100644
index 000000000000000..6611401760e466c
--- /dev/null
+++ b/libcxx/include/__utility/no_destroy.h
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___UTILITY_NO_DESTROY_H
+#define _LIBCPP___UTILITY_NO_DESTROY_H
+
+#include <__config>
+#include <__utility/forward.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+struct __uninitialized_tag {};
+struct __zero_initialized_tag {};
+
+// This class stores an object of type T but never destroys it.
+//
+// This is akin to using __attribute__((no_destroy)), except that it is possible
+// to control the lifetime of the object with more flexibility by deciding e.g.
+// whether to initialize the object at construction or to defer to a later
+// initialization using __emplace.
+template <class _Tp>
+struct __no_destroy {
+  _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(__uninitialized_tag) {}
+  _LIBCPP_HIDE_FROM_ABI explicit constexpr __no_destroy(__zero_initialized_tag) : __buf{} {}
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(_Args&&... __args) {
+    new (&__buf) _Tp(std::forward<_Args>(__args)...);
+  }
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI _Tp& __emplace(_Args&&... __args) {
+    return *new (&__buf) _Tp(std::forward<_Args>(__args)...);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _Tp& __get() { return *reinterpret_cast<_Tp*>(&__buf); }
+  _LIBCPP_HIDE_FROM_ABI _Tp const& __get() const { return *reinterpret_cast<_Tp const*>(&__buf); }
+
+private:
+  _ALIGNAS_TYPE(_Tp) char __buf[sizeof(_Tp)];
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___UTILITY_NO_DESTROY_H
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 946b85b45aef43d..3af08ddf5ee6c30 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <__utility/no_destroy.h>
 #include <algorithm>
-#include <atomic>
 #include <clocale>
 #include <codecvt>
 #include <cstddef>
@@ -158,17 +158,12 @@ class _LIBCPP_HIDDEN locale::__imp
 
     void acquire();
     void release();
+    static __no_destroy<__imp> classic_locale_imp_;
+
 private:
     void install(facet* f, long id);
     template <class F> void install(F* f) {install(f, f->id.__get());}
     template <class F> void install_from(const __imp& other);
-
-    // We don't do reference counting on the classic locale.
-    // It's never destroyed anyway, but atomic reference counting may be very
-    // expensive in parallel applications. The classic locale is used by default
-    // in all streams. Note: if a new global locale is installed, then we lose
-    // the benefit of no reference counting.
-    static std::atomic<__imp*> classic_;
 };
 
 locale::__imp::__imp(size_t refs)
@@ -547,57 +542,35 @@ locale::__imp::use_facet(long id) const
 
 // locale
 
-// This class basically implements __attribute__((no_destroy)), which isn't supported
-// by GCC as of writing this.
-template <class T>
-struct __no_destroy {
-    template <class... Args>
-    explicit __no_destroy(Args&&... args) {
-        T* obj = reinterpret_cast<T*>(&buf);
-        new (obj) T(std::forward<Args>(args)...);
-    }
-
-    T& get() { return *reinterpret_cast<T*>(&buf); }
-    T const& get() const { return *reinterpret_cast<T const*>(&buf); }
-
-  private:
-    alignas(T) byte buf[sizeof(T)];
-};
-
-std::atomic<locale::__imp*> locale::__imp::classic_;
+// We don't do reference counting on the classic locale.
+// It's never destroyed anyway, but atomic reference counting may be very
+// expensive in parallel applications. The classic locale is used by default
+// in all streams. Note: if a new global locale is installed, then we lose
+// the benefit of no reference counting.
+__no_destroy<locale::__imp> locale::__imp::classic_locale_imp_(__uninitialized_tag{}); // initialized below in classic()
 
 const locale& locale::classic() {
-    static const __no_destroy<locale> c(__private_tag{}, []{
-        __imp* ptr = &make<__imp>(1u);
-        // We use relaxed memory ordering because readers don't access
-        // the contents of the objects, they are interested in just the
-        // pointer value.
-        // If a locale uses the classic imp, then this store happens
-        // before acquire/release methods, and they must observe the
-        // right value and omit reference counting.
-        // If a locale uses a non-classic imp, then it does not matter
-        // what value it will load, the result of the comparison will
-        // be false in all cases.
-        classic_.store(ptr, std::memory_order_relaxed);
-        return ptr;
-    }());
-    return c.get();
+  static const __no_destroy<locale> classic_locale(__private_tag{}, [] {
+    // executed exactly once on first initialization of `classic_locale`
+    locale::__imp::classic_locale_imp_.__emplace(1u);
+    return &locale::__imp::classic_locale_imp_.__get();
+  }());
+  return classic_locale.__get();
 }
 
 locale& locale::__global() {
-    static __no_destroy<locale> g(locale::classic());
-    return g.get();
+  static __no_destroy<locale> g(locale::classic());
+  return g.__get();
+}
 
-void locale::__imp::acquire()
-{
-    if (this != classic_.load(std::memory_order_relaxed))
-        __add_shared();
+void locale::__imp::acquire() {
+  if (this != &locale::__imp::classic_locale_imp_.__get())
+    __add_shared();
 }
 
-void locale::__imp::release()
-{
-    if (this != classic_.load(std::memory_order_relaxed))
-        __release_shared();
+void locale::__imp::release() {
+  if (this != &locale::__imp::classic_locale_imp_.__get())
+    __release_shared();
 }
 
 locale::locale() noexcept

>From 8e64a9818d32ad87c328d897a502ac10d2beded7 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 14:02:42 -0500
Subject: [PATCH 05/10] Fix c++03 issue

---
 libcxx/include/__utility/no_destroy.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libcxx/include/__utility/no_destroy.h b/libcxx/include/__utility/no_destroy.h
index 6611401760e466c..a10f7e6b520cc2b 100644
--- a/libcxx/include/__utility/no_destroy.h
+++ b/libcxx/include/__utility/no_destroy.h
@@ -19,7 +19,6 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 struct __uninitialized_tag {};
-struct __zero_initialized_tag {};
 
 // This class stores an object of type T but never destroys it.
 //
@@ -30,7 +29,6 @@ struct __zero_initialized_tag {};
 template <class _Tp>
 struct __no_destroy {
   _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(__uninitialized_tag) {}
-  _LIBCPP_HIDE_FROM_ABI explicit constexpr __no_destroy(__zero_initialized_tag) : __buf{} {}
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(_Args&&... __args) {

>From ecce059ac8cf4869bf4034de930bdead6273e79d Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 14:36:19 -0500
Subject: [PATCH 06/10] Use union to avoid type punning

---
 libcxx/include/__utility/no_destroy.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/libcxx/include/__utility/no_destroy.h b/libcxx/include/__utility/no_destroy.h
index a10f7e6b520cc2b..b15469219fca73f 100644
--- a/libcxx/include/__utility/no_destroy.h
+++ b/libcxx/include/__utility/no_destroy.h
@@ -29,22 +29,26 @@ struct __uninitialized_tag {};
 template <class _Tp>
 struct __no_destroy {
   _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(__uninitialized_tag) {}
+  _LIBCPP_HIDE_FROM_ABI ~__no_destroy() {
+    // nothing
+  }
 
   template <class... _Args>
-  _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(_Args&&... __args) {
-    new (&__buf) _Tp(std::forward<_Args>(__args)...);
-  }
+  _LIBCPP_HIDE_FROM_ABI explicit __no_destroy(_Args&&... __args) : __obj_(std::forward<_Args>(__args)...) {}
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI _Tp& __emplace(_Args&&... __args) {
-    return *new (&__buf) _Tp(std::forward<_Args>(__args)...);
+    new (&__obj_) _Tp(std::forward<_Args>(__args)...);
+    return __obj_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _Tp& __get() { return *reinterpret_cast<_Tp*>(&__buf); }
-  _LIBCPP_HIDE_FROM_ABI _Tp const& __get() const { return *reinterpret_cast<_Tp const*>(&__buf); }
+  _LIBCPP_HIDE_FROM_ABI _Tp& __get() { return __obj_; }
+  _LIBCPP_HIDE_FROM_ABI _Tp const& __get() const { return __obj_; }
 
 private:
-  _ALIGNAS_TYPE(_Tp) char __buf[sizeof(_Tp)];
+  union {
+    _Tp __obj_;
+  };
 };
 
 _LIBCPP_END_NAMESPACE_STD

>From fd1d4ec1b9408580bdc4c4763ebccef808fabbb5 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 17:07:00 -0500
Subject: [PATCH 07/10] Apply formatting changes

---
 libcxx/src/locale.cpp | 46 ++++++++++++++-----------------------------
 1 file changed, 15 insertions(+), 31 deletions(-)

diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 3af08ddf5ee6c30..2cb75f81b36da33 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -82,9 +82,8 @@ locale_t __cloc() {
 
 namespace {
 
-struct releaser
-{
-    void operator()(locale::facet* p) {p->__release_shared();}
+struct releaser {
+  void operator()(locale::facet* p) { p->__release_shared(); }
 };
 
 template <class T, class ...Args>
@@ -160,7 +159,7 @@ class _LIBCPP_HIDDEN locale::__imp
     void release();
     static __no_destroy<__imp> classic_locale_imp_;
 
-private:
+  private:
     void install(facet* f, long id);
     template <class F> void install(F* f) {install(f, f->id.__get());}
     template <class F> void install_from(const __imp& other);
@@ -573,62 +572,47 @@ void locale::__imp::release() {
     __release_shared();
 }
 
-locale::locale() noexcept
-    : __locale_(__global().__locale_)
-{
-    __locale_->acquire();
-}
+locale::locale() noexcept : __locale_(__global().__locale_) { __locale_->acquire(); }
 
-locale::locale(const locale& l) noexcept
-    : __locale_(l.__locale_)
-{
-    __locale_->acquire();
-}
+locale::locale(const locale& l) noexcept : __locale_(l.__locale_) { __locale_->acquire(); }
 
-locale::~locale()
-{
-    __locale_->release();
-}
+locale::~locale() { __locale_->release(); }
 
 const locale&
 locale::operator=(const locale& other) noexcept
 {
-    other.__locale_->acquire();
-    __locale_->release();
-    __locale_ = other.__locale_;
-    return *this;
+  other.__locale_->acquire();
+  __locale_->release();
+  __locale_ = other.__locale_;
+  return *this;
 }
 
 locale::locale(const char* name)
     : __locale_(name ? new __imp(name)
                      : (__throw_runtime_error("locale constructed with null"), nullptr))
 {
-    __locale_->acquire();
+  __locale_->acquire();
 }
 
-locale::locale(const string& name)
-    : __locale_(new __imp(name))
-{
-    __locale_->acquire();
-}
+locale::locale(const string& name) : __locale_(new __imp(name)) { __locale_->acquire(); }
 
 locale::locale(const locale& other, const char* name, category c)
     : __locale_(name ? new __imp(*other.__locale_, name, c)
                      : (__throw_runtime_error("locale constructed with null"), nullptr))
 {
-    __locale_->acquire();
+  __locale_->acquire();
 }
 
 locale::locale(const locale& other, const string& name, category c)
     : __locale_(new __imp(*other.__locale_, name, c))
 {
-    __locale_->acquire();
+  __locale_->acquire();
 }
 
 locale::locale(const locale& other, const locale& one, category c)
     : __locale_(new __imp(*other.__locale_, *one.__locale_, c))
 {
-    __locale_->acquire();
+  __locale_->acquire();
 }
 
 string

>From 185b2b98b9b0e13ec5518c18f46b7f2b84c4d50b Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 17:11:07 -0500
Subject: [PATCH 08/10] Use proper variable names in benchmark

---
 libcxx/benchmarks/stringstream.bench.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/libcxx/benchmarks/stringstream.bench.cpp b/libcxx/benchmarks/stringstream.bench.cpp
index c10ee3a8cc5b83c..93369e6d6402bba 100644
--- a/libcxx/benchmarks/stringstream.bench.cpp
+++ b/libcxx/benchmarks/stringstream.bench.cpp
@@ -6,7 +6,7 @@
 
 TEST_NOINLINE double istream_numbers();
 
-double istream_numbers(std::locale* l) {
+double istream_numbers(std::locale* loc) {
   const char* a[] = {"-6  69 -71  2.4882e-02 -100 101 -2.00005 5000000 -50000000",
                      "-25 71   7 -9.3262e+01 -100 101 -2.00005 5000000 -50000000",
                      "-14 53  46 -6.7026e-02 -100 101 -2.00005 5000000 -50000000"};
@@ -15,8 +15,8 @@ double istream_numbers(std::locale* l) {
   double f1 = 0.0, f2 = 0.0, q = 0.0;
   for (int i = 0; i < 3; i++) {
     std::istringstream s(a[i]);
-    if (l)
-      s.imbue(*l);
+    if (loc)
+      s.imbue(*loc);
     s >> a1 >> a2 >> a3 >> f1 >> a4 >> a5 >> f2 >> a6 >> a7;
     q += (a1 + a2 + a3 + a4 + a5 + a6 + a7 + f1 + f2) / 1000000;
   }
@@ -28,8 +28,8 @@ struct LocaleSelector {
   std::locale old;
 
   LocaleSelector(benchmark::State& state) {
-    static std::mutex mu;
-    std::lock_guard l(mu);
+    static std::mutex mutex;
+    std::lock_guard guard(mutex);
     switch (state.range(0)) {
     case 0: {
       old   = std::locale::global(std::locale::classic());
@@ -38,14 +38,14 @@ struct LocaleSelector {
     }
     case 1: {
       old = std::locale::global(std::locale::classic());
-      thread_local std::locale l("en_US.UTF-8");
-      imbue = &l;
+      thread_local std::locale loc("en_US.UTF-8");
+      imbue = &loc;
       break;
     }
     case 2: {
       old = std::locale::global(std::locale::classic());
-      static std::locale l("en_US.UTF-8");
-      imbue = &l;
+      static std::locale loc("en_US.UTF-8");
+      imbue = &loc;
       break;
     }
     case 3: {
@@ -57,8 +57,8 @@ struct LocaleSelector {
   }
 
   ~LocaleSelector() {
-    static std::mutex mu;
-    std::lock_guard l(mu);
+    static std::mutex mutex;
+    std::lock_guard guard(mutex);
     std::locale::global(old);
   }
 };

>From 5d17dece518015a02a79ce0b09d06da321c7d0c8 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 17:19:46 -0500
Subject: [PATCH 09/10] Make mutex guarding calls to global() static

---
 libcxx/benchmarks/stringstream.bench.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libcxx/benchmarks/stringstream.bench.cpp b/libcxx/benchmarks/stringstream.bench.cpp
index 93369e6d6402bba..3cbe5ac5997a3c3 100644
--- a/libcxx/benchmarks/stringstream.bench.cpp
+++ b/libcxx/benchmarks/stringstream.bench.cpp
@@ -26,9 +26,9 @@ double istream_numbers(std::locale* loc) {
 struct LocaleSelector {
   std::locale* imbue;
   std::locale old;
+  static std::mutex mutex;
 
   LocaleSelector(benchmark::State& state) {
-    static std::mutex mutex;
     std::lock_guard guard(mutex);
     switch (state.range(0)) {
     case 0: {
@@ -57,12 +57,13 @@ struct LocaleSelector {
   }
 
   ~LocaleSelector() {
-    static std::mutex mutex;
     std::lock_guard guard(mutex);
     std::locale::global(old);
   }
 };
 
+std::mutex LocaleSelector::mutex;
+
 static void BM_Istream_numbers(benchmark::State& state) {
   LocaleSelector sel(state);
   double i = 0;

>From 47e346c0f3f72095ecc0ceb51217b40a8b5498ef Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 23 Nov 2023 17:21:53 -0500
Subject: [PATCH 10/10] Address review comments

---
 libcxx/include/__utility/no_destroy.h | 2 +-
 libcxx/include/module.modulemap.in    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__utility/no_destroy.h b/libcxx/include/__utility/no_destroy.h
index b15469219fca73f..c8581cf9606b571 100644
--- a/libcxx/include/__utility/no_destroy.h
+++ b/libcxx/include/__utility/no_destroy.h
@@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 struct __uninitialized_tag {};
 
-// This class stores an object of type T but never destroys it.
+// This class stores an object of type _Tp but never destroys it.
 //
 // This is akin to using __attribute__((no_destroy)), except that it is possible
 // to control the lifetime of the object with more flexibility by deciding e.g.
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 17ebe48f329963d..f8d72e89a1e5db3 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -2065,6 +2065,7 @@ module std_private_utility_move                   [system] {
   export std_private_type_traits_is_nothrow_move_constructible
   export std_private_type_traits_remove_reference
 }
+module std_private_utility_no_destroy             [system] { header "__utility/no_destroy.h" }
 module std_private_utility_pair                   [system] {
   header "__utility/pair.h"
   export std_private_ranges_subrange_fwd



More information about the libcxx-commits mailing list