[libcxx-commits] [libcxx] r368547 - Improve codegen for deque.

Eric Fiselier via libcxx-commits libcxx-commits at lists.llvm.org
Mon Aug 12 00:51:06 PDT 2019


Author: ericwf
Date: Mon Aug 12 00:51:05 2019
New Revision: 368547

URL: http://llvm.org/viewvc/llvm-project?rev=368547&view=rev
Log:
Improve codegen for deque.

This patch rewrites a few loops in deque and split_buffer to better
optimize the codegen. For constructors like
`deque<unsigned char> d(500000, 0);` this patch results in a 2x speedup.

The patch improves the codegen  in roughly three ways:

1. Changes do { ... } while (...) loops into more typical for loops.
  The optimizer can reason about normal looking loops better.

2. Split the iteration over a range into (A) iteration over the blocks,
then (B) iteration within the block. This nested structure helps LLVM
lower the inner loop to `memset`.

3. Do fewer things each iteration. Some of these loops were incrementing
  or changing 4-5 variables every loop (in addition to the
  construction). Previously most loops would increment the end pointer,
  the size, and decrement the count of remaining items to construct.
  Now we only increment a single pointer for most iterations.

Added:
    libcxx/trunk/benchmarks/Utilities.hpp
    libcxx/trunk/benchmarks/deque.bench.cpp
Modified:
    libcxx/trunk/benchmarks/ContainerBenchmarks.hpp
    libcxx/trunk/include/__split_buffer
    libcxx/trunk/include/deque

Modified: libcxx/trunk/benchmarks/ContainerBenchmarks.hpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/ContainerBenchmarks.hpp?rev=368547&r1=368546&r2=368547&view=diff
==============================================================================
--- libcxx/trunk/benchmarks/ContainerBenchmarks.hpp (original)
+++ libcxx/trunk/benchmarks/ContainerBenchmarks.hpp Mon Aug 12 00:51:05 2019
@@ -1,8 +1,18 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef BENCHMARK_CONTAINER_BENCHMARKS_HPP
 #define BENCHMARK_CONTAINER_BENCHMARKS_HPP
 
 #include <cassert>
 
+#include "Utilities.hpp"
 #include "benchmark/benchmark.h"
 
 namespace ContainerBenchmarks {
@@ -12,7 +22,7 @@ void BM_ConstructSize(benchmark::State&
   auto size = st.range(0);
   for (auto _ : st) {
     Container c(size);
-    benchmark::DoNotOptimize(c.data());
+    DoNotOptimizeData(c);
   }
 }
 
@@ -21,7 +31,7 @@ void BM_ConstructSizeValue(benchmark::St
   const auto size = st.range(0);
   for (auto _ : st) {
     Container c(size, val);
-    benchmark::DoNotOptimize(c.data());
+    DoNotOptimizeData(c);
   }
 }
 
@@ -33,7 +43,7 @@ void BM_ConstructIterIter(benchmark::Sta
     benchmark::DoNotOptimize(&in);
     while (st.KeepRunning()) {
         Container c(begin, end);
-        benchmark::DoNotOptimize(c.data());
+        DoNotOptimizeData(c);
     }
 }
 

Added: libcxx/trunk/benchmarks/Utilities.hpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/Utilities.hpp?rev=368547&view=auto
==============================================================================
--- libcxx/trunk/benchmarks/Utilities.hpp (added)
+++ libcxx/trunk/benchmarks/Utilities.hpp Mon Aug 12 00:51:05 2019
@@ -0,0 +1,33 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BENCHMARK_UTILITIES_HPP
+#define BENCHMARK_UTILITIES_HPP
+
+#include <cassert>
+#include <type_traits>
+
+#include "benchmark/benchmark.h"
+
+namespace UtilitiesInternal {
+  template <class Container>
+  auto HaveDataImpl(int) -> decltype((std::declval<Container&>().data(), std::true_type{}));
+  template <class Container>
+  auto HaveDataImpl(long) -> std::false_type;
+  template <class T>
+  using HasData = decltype(HaveDataImpl<T>(0));
+} // namespace UtilitiesInternal
+
+template <class Container, std::enable_if_t<UtilitiesInternal::HasData<Container>::value>* = nullptr>
+void DoNotOptimizeData(Container &c) { benchmark::DoNotOptimize(c.data()); }
+template <class Container, std::enable_if_t<!UtilitiesInternal::HasData<Container>::value>* = nullptr>
+void DoNotOptimizeData(Container &c) { benchmark::DoNotOptimize(&c); }
+
+
+#endif // BENCHMARK_UTILITIES_HPP

Added: libcxx/trunk/benchmarks/deque.bench.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/deque.bench.cpp?rev=368547&view=auto
==============================================================================
--- libcxx/trunk/benchmarks/deque.bench.cpp (added)
+++ libcxx/trunk/benchmarks/deque.bench.cpp Mon Aug 12 00:51:05 2019
@@ -0,0 +1,47 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <deque>
+
+#include "benchmark/benchmark.h"
+
+#include "ContainerBenchmarks.hpp"
+#include "GenerateInput.hpp"
+
+using namespace ContainerBenchmarks;
+
+constexpr std::size_t TestNumInputs = 1024;
+
+BENCHMARK_CAPTURE(BM_ConstructSize,
+    deque_byte,
+    std::deque<unsigned char>{})->Arg(5140480);
+
+BENCHMARK_CAPTURE(BM_ConstructSizeValue,
+    deque_byte,
+    std::deque<unsigned char>{}, 0)->Arg(5140480);
+
+BENCHMARK_CAPTURE(BM_ConstructIterIter,
+  deque_char,
+  std::deque<char>{},
+  getRandomIntegerInputs<char>)->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_ConstructIterIter,
+  deque_size_t,
+  std::deque<size_t>{},
+  getRandomIntegerInputs<size_t>)->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(BM_ConstructIterIter,
+  deque_string,
+  std::deque<std::string>{},
+  getRandomStringInputs)->Arg(TestNumInputs);
+
+
+
+
+BENCHMARK_MAIN();

Modified: libcxx/trunk/include/__split_buffer
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/__split_buffer?rev=368547&r1=368546&r2=368547&view=diff
==============================================================================
--- libcxx/trunk/include/__split_buffer (original)
+++ libcxx/trunk/include/__split_buffer Mon Aug 12 00:51:05 2019
@@ -161,6 +161,19 @@ private:
     _LIBCPP_INLINE_VISIBILITY
     void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT
         {}
+
+    struct _ConstructTransaction {
+      explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT
+      : __pos_(*__p), __end_(*__p + __n), __dest_(__p) {
+      }
+      ~_ConstructTransaction() {
+        *__dest_ = __pos_;
+      }
+      pointer __pos_;
+     const pointer __end_;
+    private:
+     pointer *__dest_;
+    };
 };
 
 template <class _Tp, class _Allocator>
@@ -197,13 +210,10 @@ template <class _Tp, class _Allocator>
 void
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
-    __alloc_rr& __a = this->__alloc();
-    do
-    {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
-        --__n;
-    } while (__n > 0);
+    _ConstructTransaction __tx(&this->__end_, __n);
+    for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
+        __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_));
+    }
 }
 
 //  Copy constructs __n objects starting at __end_ from __x
@@ -216,13 +226,11 @@ template <class _Tp, class _Allocator>
 void
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
-    __alloc_rr& __a = this->__alloc();
-    do
-    {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
-        ++this->__end_;
-        --__n;
-    } while (__n > 0);
+    _ConstructTransaction __tx(&this->__end_, __n);
+    for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
+        __alloc_traits::construct(this->__alloc(),
+            _VSTD::__to_raw_pointer(__tx.__pos_), __x);
+    }
 }
 
 template <class _Tp, class _Allocator>
@@ -262,11 +270,10 @@ typename enable_if
 >::type
 __split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last)
 {
-    __alloc_rr& __a = this->__alloc();
-    for (; __first != __last; ++__first)
-    {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), *__first);
-        ++this->__end_;
+    _ConstructTransaction __tx(&this->__end_, std::distance(__first, __last));
+    for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, ++__first) {
+        __alloc_traits::construct(this->__alloc(),
+            _VSTD::__to_raw_pointer(__tx.__pos_), *__first);
     }
 }
 

Modified: libcxx/trunk/include/deque
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/deque?rev=368547&r1=368546&r2=368547&view=diff
==============================================================================
--- libcxx/trunk/include/deque (original)
+++ libcxx/trunk/include/deque Mon Aug 12 00:51:05 2019
@@ -956,6 +956,74 @@ public:
     typedef __deque_iterator<value_type, const_pointer, const_reference, __map_const_pointer,
                              difference_type>    const_iterator;
 
+    struct __deque_block_range {
+      explicit __deque_block_range(pointer __b, pointer __e) _NOEXCEPT : __begin_(__b), __end_(__e) {}
+      const pointer __begin_;
+      const pointer __end_;
+    };
+
+    struct __deque_range {
+      iterator __pos_;
+      const iterator __end_;
+
+      __deque_range(iterator __pos, iterator __e) _NOEXCEPT
+        : __pos_(__pos), __end_(__e) {}
+
+      explicit operator bool() const _NOEXCEPT {
+        return __pos_ != __end_;
+      }
+
+      __deque_range begin() const {
+        return *this;
+      }
+
+      __deque_range end() const {
+        return __deque_range(__end_, __end_);
+      }
+      __deque_block_range operator*() const _NOEXCEPT {
+         if (__pos_.__m_iter_ == __end_.__m_iter_) {
+          return __deque_block_range(__pos_.__ptr_, __end_.__ptr_);
+        }
+        return __deque_block_range(__pos_.__ptr_, *__pos_.__m_iter_ + __block_size);
+      }
+
+      __deque_range& operator++() _NOEXCEPT {
+        if (__pos_.__m_iter_ == __end_.__m_iter_) {
+          __pos_ = __end_;
+        } else {
+          ++__pos_.__m_iter_;
+          __pos_.__ptr_ = *__pos_.__m_iter_;
+        }
+        return *this;
+      }
+
+
+      friend bool operator==(__deque_range const& __lhs, __deque_range const& __rhs) {
+        return __lhs.__pos_ == __rhs.__pos_;
+      }
+      friend bool operator!=(__deque_range const& __lhs, __deque_range const& __rhs) {
+        return !(__lhs == __rhs);
+      }
+    };
+
+
+
+    struct _ConstructTransaction {
+      _ConstructTransaction(__deque_base* __db, __deque_block_range& __r)
+        : __pos_(__r.__begin_), __end_(__r.__end_), __begin_(__r.__begin_), __base_(__db) {}
+
+
+      ~_ConstructTransaction() {
+        __base_->size() += (__pos_ - __begin_);
+      }
+
+      pointer __pos_;
+      const pointer __end_;
+    private:
+      const pointer __begin_;
+      __deque_base * const __base_;
+    };
+
 protected:
     __map __map_;
     size_type __start_;
@@ -1222,6 +1290,10 @@ public:
     typedef _VSTD::reverse_iterator<iterator>       reverse_iterator;
     typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
 
+    using typename __base::__deque_range;
+    using typename __base::__deque_block_range;
+    using typename __base::_ConstructTransaction;
+
     // construct/copy/destroy:
     _LIBCPP_INLINE_VISIBILITY
     deque()
@@ -2299,8 +2371,12 @@ deque<_Tp, _Allocator>::__append(_ForIte
     if (__n > __back_capacity)
         __add_back_capacity(__n - __back_capacity);
     // __n <= __back_capacity
-    for (iterator __i = __base::end(); __f != __l; ++__i, (void) ++__f, ++__base::size())
-        __alloc_traits::construct(__a, _VSTD::addressof(*__i), *__f);
+    for (__deque_block_range __br : __deque_range(__base::end(), __base::end() + __n)) {
+      _ConstructTransaction __tx(this, __br);
+      for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void)++__f) {
+        __alloc_traits::construct(__a, std::__to_raw_pointer(__tx.__pos_), *__f);
+      }
+    }
 }
 
 template <class _Tp, class _Allocator>
@@ -2312,8 +2388,12 @@ deque<_Tp, _Allocator>::__append(size_ty
     if (__n > __back_capacity)
         __add_back_capacity(__n - __back_capacity);
     // __n <= __back_capacity
-    for (iterator __i = __base::end(); __n; --__n, ++__i, ++__base::size())
-        __alloc_traits::construct(__a, _VSTD::addressof(*__i));
+    for (__deque_block_range __br : __deque_range(__base::end(), __base::end() + __n)) {
+      _ConstructTransaction __tx(this, __br);
+      for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
+        __alloc_traits::construct(__a, std::__to_raw_pointer(__tx.__pos_));
+      }
+    }
 }
 
 template <class _Tp, class _Allocator>
@@ -2325,8 +2405,13 @@ deque<_Tp, _Allocator>::__append(size_ty
     if (__n > __back_capacity)
         __add_back_capacity(__n - __back_capacity);
     // __n <= __back_capacity
-    for (iterator __i = __base::end(); __n; --__n, ++__i, ++__base::size())
-        __alloc_traits::construct(__a, _VSTD::addressof(*__i), __v);
+    for (__deque_block_range __br : __deque_range(__base::end(), __base::end() + __n)) {
+      _ConstructTransaction __tx(this, __br);
+      for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
+        __alloc_traits::construct(__a, std::__to_raw_pointer(__tx.__pos_), __v);
+      }
+    }
+
 }
 
 // Create front capacity for one block of elements.




More information about the libcxx-commits mailing list