[cfe-dev] [libc++]x=move(x) behavior
Howard Hinnant via cfe-dev
cfe-dev at lists.llvm.org
Thu Oct 1 16:42:42 PDT 2015
On Oct 1, 2015, at 6:28 PM, chen xu via cfe-dev <cfe-dev at lists.llvm.org> wrote:
>
> Hi, my project code (linux, Clang as compiler and LLVM libcxx as c++ library) will have the execution of x=std::move(x) in some code path (not directly, but sometimes it will happen via levels of function calls and parameter-pass by reference, etc.), and it turns out x is cleared after this x = std::move(x). I tried gnu c++ library, and msvc c++ library (on windows), both keep x untouched.
>
> I am wondering if this could be fixed in LLVM libcxx, as it seems making more sense not to clear x when moving x to itself. Thanks. Below is a sample code for repro. Thanks a lot.
>
> #include <iostream>
> using namespace std;
>
> class Test {
> public:
> Test() : x("test") { }
> void MoveX() {
> cout << "before x=move(x), x : " << x;
> x=move(x);
> cout << "after x=move(x), x : " << x;
> }
> private:
> string x;
> };
>
> int main(int argc, char** argv)
> {
> Test test;
> test.MoveX();
> return 0;
> }
>
> The output would be like
>
> before x=move(x), x : test
> after x=move(x), x :
I would like to make the opposite suggestion. I think the libc++ string move assignment operator is too slow instead of too fast.
Currently __move_assign(basic_string& __str, true_type) is coded like this:
template <class _CharT, class _Traits, class _Allocator>
inline _LIBCPP_INLINE_VISIBILITY
void
basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)
#if _LIBCPP_STD_VER > 14
_NOEXCEPT
#else
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
#endif
{
clear();
shrink_to_fit();
__r_.first() = __str.__r_.first();
__move_assign_alloc(__str);
__str.__zero();
}
I think the author of this code was just being lazy (or maybe was too rushed). Using this test, at -O3:
void
test(std::string& s0, std::string&& s1)
{
s0 = std::move(s1);
}
this compiles down to:
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 10
.globl __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
.align 4, 0x90
__Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_: ## @_Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, ___gxx_personality_v0
.cfi_lsda 16, Lexception0
## BB#0:
pushq %rbp
Ltmp3:
.cfi_def_cfa_offset 16
Ltmp4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp5:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Ltmp6:
.cfi_offset %rbx, -32
Ltmp7:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
testb $1, (%rbx)
jne LBB0_1
## BB#2:
movw $0, (%rbx)
jmp LBB0_3
LBB0_1:
movq 16(%rbx), %rax
movb $0, (%rax)
movq $0, 8(%rbx)
LBB0_3: ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE5clearEv.exit.i.i
Ltmp0:
xorl %esi, %esi
movq %rbx, %rdi
callq __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm
Ltmp1:
## BB#4: ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
movq 16(%r14), %rax
movq %rax, 16(%rbx)
movq (%r14), %rax
movq 8(%r14), %rcx
movq %rcx, 8(%rbx)
movq %rax, (%rbx)
movq $0, 16(%r14)
movq $0, 8(%r14)
movq $0, (%r14)
popq %rbx
popq %r14
popq %rbp
retq
LBB0_5:
Ltmp2:
movq %rax, %rdi
callq ___clang_call_terminate
Lfunc_end0:
.cfi_endproc
.section __TEXT,__gcc_except_tab
.align 2
GCC_except_table0:
Lexception0:
.byte 255 ## @LPStart Encoding = omit
.byte 155 ## @TType Encoding = indirect pcrel sdata4
.byte 21 ## @TType base offset
.byte 3 ## Call site Encoding = udata4
.byte 13 ## Call site table length
Lset0 = Ltmp0-Lfunc_begin0 ## >> Call Site 1 <<
.long Lset0
Lset1 = Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1
.long Lset1
Lset2 = Ltmp2-Lfunc_begin0 ## jumps to Ltmp2
.long Lset2
.byte 1 ## On action: 1
.byte 1 ## >> Action Record 1 <<
## Catch TypeInfo 1
.byte 0 ## No further actions
## >> Catch TypeInfos <<
.long 0 ## TypeInfo 1
.align 2
.section __TEXT,__textcoal_nt,coalesced,pure_instructions
.private_extern ___clang_call_terminate
.globl ___clang_call_terminate
.weak_def_can_be_hidden ___clang_call_terminate
.align 4, 0x90
___clang_call_terminate: ## @__clang_call_terminate
## BB#0:
pushq %rbp
movq %rsp, %rbp
callq ___cxa_begin_catch
callq __ZSt9terminatev
.subsections_via_symbols
which is a mess of code much of which *never* gets executed. There’s even an exception handling table complete with call to terminate (awful). This can be vastly improved with just a small change:
index 6ed4e74..0d07cf7 100644
--- a/include/string
+++ b/include/string
@@ -2453,8 +2453,8 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
#endif
{
- clear();
- shrink_to_fit();
+ if (__is_long())
+ __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
__r_.first() = __str.__r_.first();
__move_assign_alloc(__str);
__str.__zero();
This is equivalent functionality as what is there today, but compiles down to this instead:
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 10
.globl __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
.align 4, 0x90
__Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_: ## @_Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Ltmp3:
.cfi_offset %rbx, -32
Ltmp4:
.cfi_offset %r14, -24
movq %rsi, %rbx
movq %rdi, %r14
testb $1, (%r14)
je LBB0_2
## BB#1:
movq 16(%r14), %rdi
callq __ZdlPv
LBB0_2: ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
movq 16(%rbx), %rax
movq %rax, 16(%r14)
movq (%rbx), %rax
movq 8(%rbx), %rcx
movq %rcx, 8(%r14)
movq %rax, (%r14)
movq $0, 16(%rbx)
movq $0, 8(%rbx)
movq $0, (%rbx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
Not only is this smaller code, it completely eliminates the exception handling table, and completely eliminates a mandatory call into reserve(), which itself is going to have several more branches in it.
Howard
More information about the cfe-dev
mailing list