[libcxx-commits] [libcxx] r367183 - Fix PR35637: suboptimal codegen for `vector<unsigned char>`.

Eric Fiselier via libcxx-commits libcxx-commits at lists.llvm.org
Sat Jul 27 21:37:02 PDT 2019


Author: ericwf
Date: Sat Jul 27 21:37:02 2019
New Revision: 367183

URL: http://llvm.org/viewvc/llvm-project?rev=367183&view=rev
Log:
Fix PR35637: suboptimal codegen for `vector<unsigned char>`.

The optimizer is petulant and temperamental. In this case LLVM failed to lower
the the "insert at end" loop used by`vector<unsigned char>` to a `memset` despite
`memset` being substantially faster over a range of bytes.

LLVM has the ability to lower loops to `memset` whet appropriate, but the
odd nature of libc++'s loops prevented the optimization from taking places.

This patch addresses the issue by rewriting the loops from the form
`do [ ... --__n; } while (__n > 0);` to instead use a for loop over a pointer
range (For example: `for (auto *__i = ...; __i < __e; ++__i)`).

This patch also rewrites the asan annotations to unposion all additional memory
at the start of the loop instead of once per iterations. This could potentially
permit false negatives where the constructor of element N attempts to access
element N + 1 during its construction.

The before and after results for the `BM_ConstructSize/vector_byte/5140480_mean`
benchmark (run 5 times) are:

--------------------------------------------------------------------------------------------
Benchmark                                                 Time             CPU   Iterations
--------------------------------------------------------------------------------------------
Before
------
BM_ConstructSize/vector_byte/5140480_mean          12530140 ns     12469693 ns            N/A
BM_ConstructSize/vector_byte/5140480_median        12512818 ns     12445571 ns            N/A
BM_ConstructSize/vector_byte/5140480_stddev          106224 ns       107907 ns            5
-----
After
-----
BM_ConstructSize/vector_byte/5140480_mean            167285 ns       166500 ns            N/A
BM_ConstructSize/vector_byte/5140480_median          166749 ns       166069 ns            N/A
BM_ConstructSize/vector_byte/5140480_stddev            3242 ns         3184 ns            5

Modified:
    libcxx/trunk/benchmarks/ContainerBenchmarks.hpp
    libcxx/trunk/benchmarks/vector_operations.bench.cpp
    libcxx/trunk/include/vector

Modified: libcxx/trunk/benchmarks/ContainerBenchmarks.hpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/ContainerBenchmarks.hpp?rev=367183&r1=367182&r2=367183&view=diff
==============================================================================
--- libcxx/trunk/benchmarks/ContainerBenchmarks.hpp (original)
+++ libcxx/trunk/benchmarks/ContainerBenchmarks.hpp Sat Jul 27 21:37:02 2019
@@ -7,6 +7,23 @@
 
 namespace ContainerBenchmarks {
 
+template <class Container>
+void BM_ConstructSize(benchmark::State& st, Container) {
+  auto size = st.range(0);
+  for (auto _ : st) {
+    Container c(size);
+    benchmark::DoNotOptimize(c.data());
+  }
+}
+
+template <class Container>
+void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
+  const auto size = st.range(0);
+  for (auto _ : st) {
+    Container c(size, val);
+    benchmark::DoNotOptimize(c.data());
+  }
+}
 
 template <class Container, class GenInputs>
 void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {

Modified: libcxx/trunk/benchmarks/vector_operations.bench.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/vector_operations.bench.cpp?rev=367183&r1=367182&r2=367183&view=diff
==============================================================================
--- libcxx/trunk/benchmarks/vector_operations.bench.cpp (original)
+++ libcxx/trunk/benchmarks/vector_operations.bench.cpp Sat Jul 27 21:37:02 2019
@@ -13,6 +13,14 @@ using namespace ContainerBenchmarks;
 
 constexpr std::size_t TestNumInputs = 1024;
 
+BENCHMARK_CAPTURE(BM_ConstructSize,
+    vector_byte,
+    std::vector<unsigned char>{})->Arg(5140480);
+
+BENCHMARK_CAPTURE(BM_ConstructSizeValue,
+    vector_byte,
+    std::vector<unsigned char>{}, 0)->Arg(5140480);
+
 BENCHMARK_CAPTURE(BM_ConstructIterIter,
   vector_char,
   std::vector<char>{},

Modified: libcxx/trunk/include/vector
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/vector?rev=367183&r1=367182&r2=367183&view=diff
==============================================================================
--- libcxx/trunk/include/vector (original)
+++ libcxx/trunk/include/vector Sat Jul 27 21:37:02 2019
@@ -864,58 +864,67 @@ private:
 #else
     _LIBCPP_INLINE_VISIBILITY
     void __annotate_contiguous_container(const void*, const void*, const void*,
-                                         const void*) const {}
+                                         const void*) const _NOEXCEPT {}
 #endif
     _LIBCPP_INLINE_VISIBILITY
-    void __annotate_new(size_type __current_size) const {
+    void __annotate_new(size_type __current_size) const _NOEXCEPT {
       __annotate_contiguous_container(data(), data() + capacity(),
                                       data() + capacity(), data() + __current_size);
     }
 
     _LIBCPP_INLINE_VISIBILITY
-    void __annotate_delete() const {
+    void __annotate_delete() const _NOEXCEPT {
       __annotate_contiguous_container(data(), data() + capacity(),
                                       data() + size(), data() + capacity());
     }
 
     _LIBCPP_INLINE_VISIBILITY
-    void __annotate_increase(size_type __n) const
+    void __annotate_increase(size_type __n) const _NOEXCEPT
     {
       __annotate_contiguous_container(data(), data() + capacity(),
                                       data() + size(), data() + size() + __n);
     }
 
     _LIBCPP_INLINE_VISIBILITY
-    void __annotate_shrink(size_type __old_size) const
+    void __annotate_shrink(size_type __old_size) const _NOEXCEPT
     {
       __annotate_contiguous_container(data(), data() + capacity(),
                                       data() + __old_size, data() + size());
     }
+
+  struct _ConstructTransaction {
+    explicit _ConstructTransaction(vector &__v, size_type __n)
+      : __v_(__v),  __pos_(__v.__end_), __new_end_(__v.__end_ + __n) {
 #ifndef _LIBCPP_HAS_NO_ASAN
-    // The annotation for size increase should happen before the actual increase,
-    // but if an exception is thrown after that the annotation has to be undone.
-    struct __RAII_IncreaseAnnotator {
-      __RAII_IncreaseAnnotator(const vector &__v, size_type __n = 1)
-        : __commit(false), __v(__v), __old_size(__v.size() + __n) {
-        __v.__annotate_increase(__n);
-      }
-      void __done() { __commit = true; }
-      ~__RAII_IncreaseAnnotator() {
-        if (__commit) return;
-        __v.__annotate_shrink(__old_size);
+      __v_.__annotate_increase(__n);
+#endif
+    }
+    ~_ConstructTransaction() {
+      __v_.__end_ = __pos_;
+#ifndef _LIBCPP_HAS_NO_ASAN
+      if (__pos_ != __new_end_) {
+        __v_.__annotate_shrink(__new_end_ - __v_.__begin_);
       }
-      bool __commit;
-      const vector &__v;
-      size_type __old_size;
-    };
-#else
-    struct __RAII_IncreaseAnnotator {
-      _LIBCPP_INLINE_VISIBILITY
-      __RAII_IncreaseAnnotator(const vector &, size_type = 1) {}
-      _LIBCPP_INLINE_VISIBILITY void __done() {}
-    };
 #endif
+    }
 
+    vector &__v_;
+    pointer __pos_;
+    const_pointer const __new_end_;
+
+  private:
+    _ConstructTransaction(_ConstructTransaction const&) = delete;
+    _ConstructTransaction& operator=(_ConstructTransaction const&) = delete;
+  };
+
+  template <class ..._Args>
+  _LIBCPP_INLINE_VISIBILITY
+  void __construct_one_at_end(_Args&& ...__args) {
+    _ConstructTransaction __tx(*this, 1);
+    __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_),
+        _VSTD::forward<_Args>(__args)...);
+    ++__tx.__pos_;
+  }
 };
 
 #ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
@@ -1027,15 +1036,10 @@ template <class _Tp, class _Allocator>
 void
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
-    allocator_type& __a = this->__alloc();
-    do
-    {
-        __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
-        --__n;
-        __annotator.__done();
-    } while (__n > 0);
+    _ConstructTransaction __tx(*this, __n);
+    for (; __tx.__pos_ != __tx.__new_end_; ++__tx.__pos_) {
+      __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_));
+    }
 }
 
 //  Copy constructs __n objects starting at __end_ from __x
@@ -1049,15 +1053,10 @@ inline
 void
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
-    allocator_type& __a = this->__alloc();
-    do
-    {
-        __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
-        ++this->__end_;
-        --__n;
-        __annotator.__done();
-    } while (__n > 0);
+    _ConstructTransaction __tx(*this, __n);
+    for (; __tx.__pos_ != __tx.__new_end_; ++__tx.__pos_) {
+        __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_), __x);
+    }
 }
 
 template <class _Tp, class _Allocator>
@@ -1069,10 +1068,8 @@ typename enable_if
 >::type
 vector<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last, size_type __n)
 {
-    allocator_type& __a = this->__alloc();
-    __RAII_IncreaseAnnotator __annotator(*this, __n);
-    __alloc_traits::__construct_range_forward(__a, __first, __last, this->__end_);
-    __annotator.__done();
+    _ConstructTransaction __tx(*this, __n);
+    __alloc_traits::__construct_range_forward(this->__alloc(), __first, __last, __tx.__pos_);
 }
 
 //  Default constructs __n objects starting at __end_
@@ -1633,11 +1630,7 @@ vector<_Tp, _Allocator>::push_back(const
 {
     if (this->__end_ != this->__end_cap())
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(this->__alloc(),
-                                  _VSTD::__to_raw_pointer(this->__end_), __x);
-        __annotator.__done();
-        ++this->__end_;
+        __construct_one_at_end(__x);
     }
     else
         __push_back_slow_path(__x);
@@ -1652,12 +1645,7 @@ vector<_Tp, _Allocator>::push_back(value
 {
     if (this->__end_ < this->__end_cap())
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(this->__alloc(),
-                                  _VSTD::__to_raw_pointer(this->__end_),
-                                  _VSTD::move(__x));
-        __annotator.__done();
-        ++this->__end_;
+        __construct_one_at_end(_VSTD::move(__x));
     }
     else
         __push_back_slow_path(_VSTD::move(__x));
@@ -1688,12 +1676,7 @@ vector<_Tp, _Allocator>::emplace_back(_A
 {
     if (this->__end_ < this->__end_cap())
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(this->__alloc(),
-                                  _VSTD::__to_raw_pointer(this->__end_),
-                                  _VSTD::forward<_Args>(__args)...);
-        __annotator.__done();
-        ++this->__end_;
+        __construct_one_at_end(_VSTD::forward<_Args>(__args)...);
     }
     else
         __emplace_back_slow_path(_VSTD::forward<_Args>(__args)...);
@@ -1761,10 +1744,15 @@ vector<_Tp, _Allocator>::__move_range(po
 {
     pointer __old_last = this->__end_;
     difference_type __n = __old_last - __to;
-    for (pointer __i = __from_s + __n; __i < __from_e; ++__i, ++this->__end_)
-        __alloc_traits::construct(this->__alloc(),
-                                  _VSTD::__to_raw_pointer(this->__end_),
-                                  _VSTD::move(*__i));
+    {
+      pointer __i = __from_s + __n;
+      _ConstructTransaction __tx(*this, __from_e - __i);
+      for (; __i < __from_e; ++__i, ++__tx.__pos_) {
+          __alloc_traits::construct(this->__alloc(),
+                                    _VSTD::__to_raw_pointer(__tx.__pos_),
+                                    _VSTD::move(*__i));
+      }
+    }
     _VSTD::move_backward(__from_s, __from_s + __n, __old_last);
 }
 
@@ -1780,12 +1768,9 @@ vector<_Tp, _Allocator>::insert(const_it
     pointer __p = this->__begin_ + (__position - begin());
     if (this->__end_ < this->__end_cap())
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
         if (__p == this->__end_)
         {
-            __alloc_traits::construct(this->__alloc(),
-                                      _VSTD::__to_raw_pointer(this->__end_), __x);
-            ++this->__end_;
+            __construct_one_at_end(__x);
         }
         else
         {
@@ -1795,7 +1780,6 @@ vector<_Tp, _Allocator>::insert(const_it
                 ++__xr;
             *__p = *__xr;
         }
-        __annotator.__done();
     }
     else
     {
@@ -1821,20 +1805,15 @@ vector<_Tp, _Allocator>::insert(const_it
     pointer __p = this->__begin_ + (__position - begin());
     if (this->__end_ < this->__end_cap())
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
         if (__p == this->__end_)
         {
-            __alloc_traits::construct(this->__alloc(),
-                                      _VSTD::__to_raw_pointer(this->__end_),
-                                      _VSTD::move(__x));
-            ++this->__end_;
+            __construct_one_at_end(_VSTD::move(__x));
         }
         else
         {
             __move_range(__p, this->__end_, __p + 1);
             *__p = _VSTD::move(__x);
         }
-        __annotator.__done();
     }
     else
     {
@@ -1859,13 +1838,9 @@ vector<_Tp, _Allocator>::emplace(const_i
     pointer __p = this->__begin_ + (__position - begin());
     if (this->__end_ < this->__end_cap())
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
         if (__p == this->__end_)
         {
-            __alloc_traits::construct(this->__alloc(),
-                                      _VSTD::__to_raw_pointer(this->__end_),
-                                      _VSTD::forward<_Args>(__args)...);
-            ++this->__end_;
+            __construct_one_at_end(_VSTD::forward<_Args>(__args)...);
         }
         else
         {
@@ -1873,7 +1848,6 @@ vector<_Tp, _Allocator>::emplace(const_i
             __move_range(__p, this->__end_, __p + 1);
             *__p = _VSTD::move(__tmp.get());
         }
-        __annotator.__done();
     }
     else
     {
@@ -1911,9 +1885,7 @@ vector<_Tp, _Allocator>::insert(const_it
             }
             if (__n > 0)
             {
-                __RAII_IncreaseAnnotator __annotator(*this, __n);
                 __move_range(__p, __old_last, __p + __old_n);
-                __annotator.__done();
                 const_pointer __xr = pointer_traits<const_pointer>::pointer_to(__x);
                 if (__p <= __xr && __xr < this->__end_)
                     __xr += __old_n;
@@ -1955,11 +1927,7 @@ vector<_Tp, _Allocator>::insert(const_it
     pointer __old_last = this->__end_;
     for (; this->__end_ != this->__end_cap() && __first != __last; ++__first)
     {
-        __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_),
-                                  *__first);
-        ++this->__end_;
-        __annotator.__done();
+        __construct_one_at_end(*__first);
     }
     __split_buffer<value_type, allocator_type&> __v(__a);
     if (__first != __last)
@@ -2026,9 +1994,7 @@ vector<_Tp, _Allocator>::insert(const_it
             }
             if (__n > 0)
             {
-                __RAII_IncreaseAnnotator __annotator(*this, __n);
                 __move_range(__p, __old_last, __p + __old_n);
-                __annotator.__done();
                 _VSTD::copy(__first, __m, __p);
             }
         }




More information about the libcxx-commits mailing list