[PATCH] D44823: [libcxx] Improving std::vector<char> and std::deque<char> perfomance

Danila Kutenin via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 23 03:50:04 PDT 2018


danlark created this revision.
danlark added reviewers: EricWF, mclow.lists.
Herald added subscribers: cfe-commits, christof.

Consider the following code.

  #include <memory>
  #include <vector>
  
  class TestClass {
  public:
      TestClass(size_t size)
          : Data(size)
      {
      }
  private:
      std::vector<char> Data;
  };
  
  int main(void) {
      std::unique_ptr<TestClass> test;
      for (int i = 0; i < 100000; ++i)
          test.reset(new TestClass(0x10000));
      return 0;
  }

For clang 5.0.1 it works for 14sec on my laptop. If you replace `char` by `short` it becomes 35 times faster(wow). The main difference in the generated code that for `char` no `memset` is called inside `__construct_at_end` function.

By manipulating a local variable in the loop, this lets it be fully optimized away.

Prior to this change, this would be generated (on x86-64):

  51,79c58,66
  <   movq  %rax, 8(%rbx)
  <   movq  %rax, (%rbx)
  <   movq  %rax, %rcx
  <   addq  $65536, %rcx            # imm = 0x10000
  <   movq  %rcx, 16(%rbx)
  <   movq  $-65536, %rcx           # imm = 0xFFFFFFFFFFFF0000
  <   .align  16, 0x90
  < .LBB0_4:                                #   Parent Loop BB0_1 Depth=1
  <                                         # =>  This Inner Loop Header: Depth=2
  <   movb  $0, (%rax)
  <   movq  8(%rbx), %rax
  <   leaq  1(%rax), %rdx
  <   movq  %rdx, 8(%rbx)
  <   movb  $0, 1(%rax)
  <   movq  8(%rbx), %rax
  <   leaq  1(%rax), %rdx
  <   movq  %rdx, 8(%rbx)
  <   movb  $0, 1(%rax)
  <   movq  8(%rbx), %rax
  <   leaq  1(%rax), %rdx
  <   movq  %rdx, 8(%rbx)
  <   movb  $0, 1(%rax)
  <   movq  8(%rbx), %rax
  <   incq  %rax
  <   movq  %rax, 8(%rbx)
  <   addq  $4, %rcx
  <   jne  .LBB0_4
  < # BB#5:                                 # %_ZN9TestClassC2Em.exit
  <                                         #   in Loop: Header=BB0_1 Depth=1
  ---
  >   movq  %rax, (%r12)
  >   movq  %rax, %rbx
  >   addq  $65536, %rbx            # imm = 0x10000
  >   movq  %rbx, 16(%r12)
  >   xorl  %esi, %esi
  >   movl  $65536, %edx            # imm = 0x10000
  >   movq  %rax, %rdi
  >   callq  memset
  >   movq  %rbx, 8(%r12)
  81,82c68,69


Repository:
  rCXX libc++

https://reviews.llvm.org/D44823

Files:
  libcxx/trunk/include/__split_buffer
  libcxx/trunk/include/vector


Index: libcxx/trunk/include/vector
===================================================================
--- libcxx/trunk/include/vector
+++ libcxx/trunk/include/vector
@@ -984,11 +984,13 @@
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
     allocator_type& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
         __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
         __annotator.__done();
     } while (__n > 0);
@@ -1006,11 +1008,13 @@
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
     allocator_type& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
         __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), __x);
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
         __annotator.__done();
     } while (__n > 0);
Index: libcxx/trunk/include/__split_buffer
===================================================================
--- libcxx/trunk/include/__split_buffer
+++ libcxx/trunk/include/__split_buffer
@@ -198,10 +198,12 @@
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
     __alloc_rr& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
     } while (__n > 0);
 }
@@ -217,10 +219,12 @@
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
     __alloc_rr& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), __x);
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
     } while (__n > 0);
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44823.139570.patch
Type: text/x-patch
Size: 2486 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180323/3124263a/attachment.bin>


More information about the cfe-commits mailing list