[cfe-dev] [libc++]x=move(x) behavior

Howard Hinnant via cfe-dev cfe-dev at lists.llvm.org
Thu Oct 1 16:42:42 PDT 2015


On Oct 1, 2015, at 6:28 PM, chen xu via cfe-dev <cfe-dev at lists.llvm.org> wrote:
> 
> Hi, my project code (linux, Clang as compiler and LLVM libcxx as c++ library) will have the execution of x=std::move(x) in some code path (not directly, but sometimes it will happen via levels of function calls and parameter-pass by reference, etc.), and it turns out x is cleared after this x = std::move(x). I tried gnu c++ library, and msvc c++ library (on windows), both keep x untouched. 
> 
> I am wondering if this could be fixed in LLVM libcxx, as it seems making more sense not to clear x when moving x to itself. Thanks. Below is a sample code for repro.  Thanks a lot. 
> 
> #include <iostream>
> using namespace std;
> 
> class Test {
> public:
>     Test() : x("test")  { } 
>     void MoveX() {
>        cout << "before x=move(x), x : " << x;
>        x=move(x);
>        cout << "after x=move(x), x : " << x; 
>     }
> private:
>     string x;
> };
> 
> int main(int argc, char** argv)
> {
>     Test test;
>     test.MoveX();
>     return 0;
> }
> 
> The output would be like 
> 
> before x=move(x), x : test
> after x=move(x), x : 

I would like to make the opposite suggestion.  I think the libc++ string move assignment operator is too slow instead of too fast.

Currently __move_assign(basic_string& __str, true_type) is coded like this:

    template <class _CharT, class _Traits, class _Allocator>
    inline _LIBCPP_INLINE_VISIBILITY
    void
    basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)
    #if _LIBCPP_STD_VER > 14
        _NOEXCEPT
    #else
        _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
    #endif
    {
        clear();
        shrink_to_fit();
        __r_.first() = __str.__r_.first();
        __move_assign_alloc(__str);
        __str.__zero();
    }

I think the author of this code was just being lazy (or maybe was too rushed).  Using this test, at -O3:

    void
    test(std::string& s0, std::string&& s1)
    {
        s0 = std::move(s1);
    }

this compiles down to:

        .section	__TEXT,__text,regular,pure_instructions
        .macosx_version_min 10, 10
        .globl	__Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
        .align	4, 0x90
    __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_: ## @_Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
    Lfunc_begin0:
        .cfi_startproc
        .cfi_personality 155, ___gxx_personality_v0
        .cfi_lsda 16, Lexception0
    ## BB#0:
        pushq	%rbp
    Ltmp3:
        .cfi_def_cfa_offset 16
    Ltmp4:
        .cfi_offset %rbp, -16
        movq	%rsp, %rbp
    Ltmp5:
        .cfi_def_cfa_register %rbp
        pushq	%r14
        pushq	%rbx
    Ltmp6:
        .cfi_offset %rbx, -32
    Ltmp7:
        .cfi_offset %r14, -24
        movq	%rsi, %r14
        movq	%rdi, %rbx
        testb	$1, (%rbx)
        jne	LBB0_1
    ## BB#2:
        movw	$0, (%rbx)
        jmp	LBB0_3
    LBB0_1:
        movq	16(%rbx), %rax
        movb	$0, (%rax)
        movq	$0, 8(%rbx)
    LBB0_3:                                 ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE5clearEv.exit.i.i
    Ltmp0:
        xorl	%esi, %esi
        movq	%rbx, %rdi
        callq	__ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7reserveEm
    Ltmp1:
    ## BB#4:                                ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
        movq	16(%r14), %rax
        movq	%rax, 16(%rbx)
        movq	(%r14), %rax
        movq	8(%r14), %rcx
        movq	%rcx, 8(%rbx)
        movq	%rax, (%rbx)
        movq	$0, 16(%r14)
        movq	$0, 8(%r14)
        movq	$0, (%r14)
        popq	%rbx
        popq	%r14
        popq	%rbp
        retq
    LBB0_5:
    Ltmp2:
        movq	%rax, %rdi
        callq	___clang_call_terminate
    Lfunc_end0:
        .cfi_endproc
        .section	__TEXT,__gcc_except_tab
        .align	2
    GCC_except_table0:
    Lexception0:
        .byte	255                     ## @LPStart Encoding = omit
        .byte	155                     ## @TType Encoding = indirect pcrel sdata4
        .byte	21                      ## @TType base offset
        .byte	3                       ## Call site Encoding = udata4
        .byte	13                      ## Call site table length
    Lset0 = Ltmp0-Lfunc_begin0              ## >> Call Site 1 <<
        .long	Lset0
    Lset1 = Ltmp1-Ltmp0                     ##   Call between Ltmp0 and Ltmp1
        .long	Lset1
    Lset2 = Ltmp2-Lfunc_begin0              ##     jumps to Ltmp2
        .long	Lset2
        .byte	1                       ##   On action: 1
        .byte	1                       ## >> Action Record 1 <<
                                            ##   Catch TypeInfo 1
        .byte	0                       ##   No further actions
                                            ## >> Catch TypeInfos <<
        .long	0                       ## TypeInfo 1
        .align	2

        .section	__TEXT,__textcoal_nt,coalesced,pure_instructions
        .private_extern	___clang_call_terminate
        .globl	___clang_call_terminate
        .weak_def_can_be_hidden	___clang_call_terminate
        .align	4, 0x90
    ___clang_call_terminate:                ## @__clang_call_terminate
    ## BB#0:
        pushq	%rbp
        movq	%rsp, %rbp
        callq	___cxa_begin_catch
        callq	__ZSt9terminatev


    .subsections_via_symbols

which is a mess of code much of which *never* gets executed.  There’s even an exception handling table complete with call to terminate (awful).  This can be vastly improved with just a small change:

    index 6ed4e74..0d07cf7 100644
    --- a/include/string
    +++ b/include/string
    @@ -2453,8 +2453,8 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr
         _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
     #endif
     {
    -    clear();
    -    shrink_to_fit();
    +    if (__is_long())
    +        __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
         __r_.first() = __str.__r_.first();
         __move_assign_alloc(__str);
         __str.__zero();

This is equivalent functionality as what is there today, but compiles down to this instead:

        .section	__TEXT,__text,regular,pure_instructions
        .macosx_version_min 10, 10
        .globl	__Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
        .align	4, 0x90
    __Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_: ## @_Z4testRNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEOS5_
        .cfi_startproc
    ## BB#0:
        pushq	%rbp
    Ltmp0:
        .cfi_def_cfa_offset 16
    Ltmp1:
        .cfi_offset %rbp, -16
        movq	%rsp, %rbp
    Ltmp2:
        .cfi_def_cfa_register %rbp
        pushq	%r14
        pushq	%rbx
    Ltmp3:
        .cfi_offset %rbx, -32
    Ltmp4:
        .cfi_offset %r14, -24
        movq	%rsi, %rbx
        movq	%rdi, %r14
        testb	$1, (%r14)
        je	LBB0_2
    ## BB#1:
        movq	16(%r14), %rdi
        callq	__ZdlPv
    LBB0_2:                                 ## %_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEaSEOS5_.exit
        movq	16(%rbx), %rax
        movq	%rax, 16(%r14)
        movq	(%rbx), %rax
        movq	8(%rbx), %rcx
        movq	%rcx, 8(%r14)
        movq	%rax, (%r14)
        movq	$0, 16(%rbx)
        movq	$0, 8(%rbx)
        movq	$0, (%rbx)
        popq	%rbx
        popq	%r14
        popq	%rbp
        retq
        .cfi_endproc


    .subsections_via_symbols

Not only is this smaller code, it completely eliminates the exception handling table, and completely eliminates a mandatory call into reserve(), which itself is going to have several more branches in it.

Howard




More information about the cfe-dev mailing list