[PATCH] D44823: [libcxx] Improving std::vector<char> and std::deque<char> perfomance
Danila Kutenin via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 23 03:50:04 PDT 2018
danlark created this revision.
danlark added reviewers: EricWF, mclow.lists.
Herald added subscribers: cfe-commits, christof.
Consider the following code.
#include <memory>
#include <vector>
class TestClass {
public:
TestClass(size_t size)
: Data(size)
{
}
private:
std::vector<char> Data;
};
int main(void) {
std::unique_ptr<TestClass> test;
for (int i = 0; i < 100000; ++i)
test.reset(new TestClass(0x10000));
return 0;
}
For clang 5.0.1 it works for 14sec on my laptop. If you replace `char` by `short` it becomes 35 times faster(wow). The main difference in the generated code that for `char` no `memset` is called inside `__construct_at_end` function.
By manipulating a local variable in the loop, this lets it be fully optimized away.
Prior to this change, this would be generated (on x86-64):
51,79c58,66
< movq %rax, 8(%rbx)
< movq %rax, (%rbx)
< movq %rax, %rcx
< addq $65536, %rcx # imm = 0x10000
< movq %rcx, 16(%rbx)
< movq $-65536, %rcx # imm = 0xFFFFFFFFFFFF0000
< .align 16, 0x90
< .LBB0_4: # Parent Loop BB0_1 Depth=1
< # => This Inner Loop Header: Depth=2
< movb $0, (%rax)
< movq 8(%rbx), %rax
< leaq 1(%rax), %rdx
< movq %rdx, 8(%rbx)
< movb $0, 1(%rax)
< movq 8(%rbx), %rax
< leaq 1(%rax), %rdx
< movq %rdx, 8(%rbx)
< movb $0, 1(%rax)
< movq 8(%rbx), %rax
< leaq 1(%rax), %rdx
< movq %rdx, 8(%rbx)
< movb $0, 1(%rax)
< movq 8(%rbx), %rax
< incq %rax
< movq %rax, 8(%rbx)
< addq $4, %rcx
< jne .LBB0_4
< # BB#5: # %_ZN9TestClassC2Em.exit
< # in Loop: Header=BB0_1 Depth=1
---
> movq %rax, (%r12)
> movq %rax, %rbx
> addq $65536, %rbx # imm = 0x10000
> movq %rbx, 16(%r12)
> xorl %esi, %esi
> movl $65536, %edx # imm = 0x10000
> movq %rax, %rdi
> callq memset
> movq %rbx, 8(%r12)
81,82c68,69
Repository:
rCXX libc++
https://reviews.llvm.org/D44823
Files:
libcxx/trunk/include/__split_buffer
libcxx/trunk/include/vector
Index: libcxx/trunk/include/vector
===================================================================
--- libcxx/trunk/include/vector
+++ libcxx/trunk/include/vector
@@ -984,11 +984,13 @@
vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
{
allocator_type& __a = this->__alloc();
+ pointer __to_be_end = this->__end_;
do
{
__RAII_IncreaseAnnotator __annotator(*this);
- __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
- ++this->__end_;
+ __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+ ++__to_be_end;
+ this->__end_ = __to_be_end;
--__n;
__annotator.__done();
} while (__n > 0);
@@ -1006,11 +1008,13 @@
vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
{
allocator_type& __a = this->__alloc();
+ pointer __to_be_end = this->__end_;
do
{
__RAII_IncreaseAnnotator __annotator(*this);
- __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
- ++this->__end_;
+ __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), __x);
+ ++__to_be_end;
+ this->__end_ = __to_be_end;
--__n;
__annotator.__done();
} while (__n > 0);
Index: libcxx/trunk/include/__split_buffer
===================================================================
--- libcxx/trunk/include/__split_buffer
+++ libcxx/trunk/include/__split_buffer
@@ -198,10 +198,12 @@
__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n)
{
__alloc_rr& __a = this->__alloc();
+ pointer __to_be_end = this->__end_;
do
{
- __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
- ++this->__end_;
+ __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+ ++__to_be_end;
+ this->__end_ = __to_be_end;
--__n;
} while (__n > 0);
}
@@ -217,10 +219,12 @@
__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
{
__alloc_rr& __a = this->__alloc();
+ pointer __to_be_end = this->__end_;
do
{
- __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
- ++this->__end_;
+ __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), __x);
+ ++__to_be_end;
+ this->__end_ = __to_be_end;
--__n;
} while (__n > 0);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44823.139570.patch
Type: text/x-patch
Size: 2486 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180323/3124263a/attachment.bin>
More information about the cfe-commits
mailing list