[cfe-dev] [libc++]x=move(x) behavior

Eric Fiselier via cfe-dev cfe-dev at lists.llvm.org
Thu Oct 1 22:11:58 PDT 2015


Hi Chen,

What Howard forgot to mention is that it is that self move-assignment
on standard library types is undefined behavior.
In the past there was an exception requiring std::string self move
assignment to be a nop but that requirement has been removed.

Please see https://llvm.org/bugs/show_bug.cgi?id=24453 for more
information about this issue.

On Thu, Oct 1, 2015 at 5:42 PM, Howard Hinnant via cfe-dev
<cfe-dev at lists.llvm.org> wrote:
> On Oct 1, 2015, at 6:28 PM, chen xu via cfe-dev <cfe-dev at lists.llvm.org> wrote:
>>
>> Hi, my project code (linux, Clang as compiler and LLVM libcxx as c++ library) will have the execution of x=std::move(x) in some code path (not directly, but sometimes it will happen via levels of function calls and parameter-pass by reference, etc.), and it turns out x is cleared after this x = std::move(x). I tried gnu c++ library, and msvc c++ library (on windows), both keep x untouched.
>>
>> I am wondering if this could be fixed in LLVM libcxx, as it seems making more sense not to clear x when moving x to itself. Thanks. Below is a sample code for repro.  Thanks a lot.
>>
>> #include <iostream>
>> using namespace std;
>>
>> class Test {
>> public:
>>     Test() : x("test")  { }
>>     void MoveX() {
>>        cout << "before x=move(x), x : " << x;
>>        x=move(x);
>>        cout << "after x=move(x), x : " << x;
>>     }
>> private:
>>     string x;
>> };
>>
>> int main(int argc, char** argv)
>> {
>>     Test test;
>>     test.MoveX();
>>     return 0;
>> }
>>
>> The output would be like
>>
>> before x=move(x), x : test
>> after x=move(x), x :
>
> I would like to make the opposite suggestion.  I think the libc++ string move assignment operator is too slow instead of too fast.
>
> Currently __move_assign(basic_string& __str, true_type) is coded like this:
>
>     template <class _CharT, class _Traits, class _Allocator>
>     inline _LIBCPP_INLINE_VISIBILITY
>     void
>     basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)
>     #if _LIBCPP_STD_VER > 14
>         _NOEXCEPT
>     #else
>         _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
>     #endif
>     {
>         clear();
>         shrink_to_fit();
>         __r_.first() = __str.__r_.first();
>         __move_assign_alloc(__str);
>         __str.__zero();
>     }
>
> I think the author of this code was just being lazy (or maybe was too rushed).  Using this test, at -O3:
>
>     void
>     test(std::string& s0, std::string&& s1)
>     {
>         s0 = std::move(s1);
>     }
>
> this compiles down to:
>
>         .section        __TEXT,__text,regular,pure_instructions
>         .macosx_version_min 10, 10
>         .globl  __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
>         .align  4, 0x90
>     __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_: ## @_Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
>     Lfunc_begin0:
>         .cfi_startproc
>         .cfi_personality 155, ___gxx_personality_v0
>         .cfi_lsda 16, Lexception0
>     ## BB#0:
>         pushq   %rbp
>     Ltmp3:
>         .cfi_def_cfa_offset 16
>     Ltmp4:
>         .cfi_offset %rbp, -16
>         movq    %rsp, %rbp
>     Ltmp5:
>         .cfi_def_cfa_register %rbp
>         pushq   %r14
>         pushq   %rbx
>     Ltmp6:
>         .cfi_offset %rbx, -32
>     Ltmp7:
>         .cfi_offset %r14, -24
>         movq    %rsi, %r14
>         movq    %rdi, %rbx
>         testb   $1, (%rbx)
>         jne     LBB0_1
>     ## BB#2:
>         movw    $0, (%rbx)
>         jmp     LBB0_3
>     LBB0_1:
>         movq    16(%rbx), %rax
>         movb    $0, (%rax)
>         movq    $0, 8(%rbx)
>     LBB0_3:                                 ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE5clearEv.exit.i.i
>     Ltmp0:
>         xorl    %esi, %esi
>         movq    %rbx, %rdi
>         callq   __ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm
>     Ltmp1:
>     ## BB#4:                                ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
>         movq    16(%r14), %rax
>         movq    %rax, 16(%rbx)
>         movq    (%r14), %rax
>         movq    8(%r14), %rcx
>         movq    %rcx, 8(%rbx)
>         movq    %rax, (%rbx)
>         movq    $0, 16(%r14)
>         movq    $0, 8(%r14)
>         movq    $0, (%r14)
>         popq    %rbx
>         popq    %r14
>         popq    %rbp
>         retq
>     LBB0_5:
>     Ltmp2:
>         movq    %rax, %rdi
>         callq   ___clang_call_terminate
>     Lfunc_end0:
>         .cfi_endproc
>         .section        __TEXT,__gcc_except_tab
>         .align  2
>     GCC_except_table0:
>     Lexception0:
>         .byte   255                     ## @LPStart Encoding = omit
>         .byte   155                     ## @TType Encoding = indirect pcrel sdata4
>         .byte   21                      ## @TType base offset
>         .byte   3                       ## Call site Encoding = udata4
>         .byte   13                      ## Call site table length
>     Lset0 = Ltmp0-Lfunc_begin0              ## >> Call Site 1 <<
>         .long   Lset0
>     Lset1 = Ltmp1-Ltmp0                     ##   Call between Ltmp0 and Ltmp1
>         .long   Lset1
>     Lset2 = Ltmp2-Lfunc_begin0              ##     jumps to Ltmp2
>         .long   Lset2
>         .byte   1                       ##   On action: 1
>         .byte   1                       ## >> Action Record 1 <<
>                                             ##   Catch TypeInfo 1
>         .byte   0                       ##   No further actions
>                                             ## >> Catch TypeInfos <<
>         .long   0                       ## TypeInfo 1
>         .align  2
>
>         .section        __TEXT,__textcoal_nt,coalesced,pure_instructions
>         .private_extern ___clang_call_terminate
>         .globl  ___clang_call_terminate
>         .weak_def_can_be_hidden ___clang_call_terminate
>         .align  4, 0x90
>     ___clang_call_terminate:                ## @__clang_call_terminate
>     ## BB#0:
>         pushq   %rbp
>         movq    %rsp, %rbp
>         callq   ___cxa_begin_catch
>         callq   __ZSt9terminatev
>
>
>     .subsections_via_symbols
>
> which is a mess of code much of which *never* gets executed.  There’s even an exception handling table complete with call to terminate (awful).  This can be vastly improved with just a small change:
>
>     index 6ed4e74..0d07cf7 100644
>     --- a/include/string
>     +++ b/include/string
>     @@ -2453,8 +2453,8 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr
>          _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
>      #endif
>      {
>     -    clear();
>     -    shrink_to_fit();
>     +    if (__is_long())
>     +        __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
>          __r_.first() = __str.__r_.first();
>          __move_assign_alloc(__str);
>          __str.__zero();
>
> This is equivalent functionality as what is there today, but compiles down to this instead:
>
>         .section        __TEXT,__text,regular,pure_instructions
>         .macosx_version_min 10, 10
>         .globl  __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
>         .align  4, 0x90
>     __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_: ## @_Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
>         .cfi_startproc
>     ## BB#0:
>         pushq   %rbp
>     Ltmp0:
>         .cfi_def_cfa_offset 16
>     Ltmp1:
>         .cfi_offset %rbp, -16
>         movq    %rsp, %rbp
>     Ltmp2:
>         .cfi_def_cfa_register %rbp
>         pushq   %r14
>         pushq   %rbx
>     Ltmp3:
>         .cfi_offset %rbx, -32
>     Ltmp4:
>         .cfi_offset %r14, -24
>         movq    %rsi, %rbx
>         movq    %rdi, %r14
>         testb   $1, (%r14)
>         je      LBB0_2
>     ## BB#1:
>         movq    16(%r14), %rdi
>         callq   __ZdlPv
>     LBB0_2:                                 ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
>         movq    16(%rbx), %rax
>         movq    %rax, 16(%r14)
>         movq    (%rbx), %rax
>         movq    8(%rbx), %rcx
>         movq    %rcx, 8(%r14)
>         movq    %rax, (%r14)
>         movq    $0, 16(%rbx)
>         movq    $0, 8(%rbx)
>         movq    $0, (%rbx)
>         popq    %rbx
>         popq    %r14
>         popq    %rbp
>         retq
>         .cfi_endproc
>
>
>     .subsections_via_symbols
>
> Not only is this smaller code, it completely eliminates the exception handling table, and completely eliminates a mandatory call into reserve(), which itself is going to have several more branches in it.
>
> Howard
>
> _______________________________________________
> cfe-dev mailing list
> cfe-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-dev



More information about the cfe-dev mailing list