[LLVMbugs] [Bug 12848] New: constexpr declaration inconsistencies / performance issue

bugzilla-daemon at llvm.org bugzilla-daemon at llvm.org
Wed May 16 05:39:52 PDT 2012


http://llvm.org/bugs/show_bug.cgi?id=12848

             Bug #: 12848
           Summary: constexpr declaration inconsistencies / performance
                    issue
           Product: clang
           Version: trunk
          Platform: PC
        OS/Version: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: C++11
        AssignedTo: unassignedclangbugs at nondot.org
        ReportedBy: andyg1001 at hotmail.co.uk
                CC: dgregor at apple.com, llvmbugs at cs.uiuc.edu
    Classification: Unclassified


Consider the following code sample:


#include <stdio.h>

struct S {
    int value;
};

constexpr S fn1() {
    return { 1234 };
}

constexpr int fn2() {
    return { 1234 };
}

template <typename X>
constexpr X fn3() {
    return { 1234 };
}

constexpr S   s1 = fn1();
constexpr int i1 = fn2();
constexpr S   s2 = fn3<S>();
constexpr int i2 = fn3<int>();

int main() {
    constexpr S   s1 = fn1();
    constexpr int i1 = fn2();
    constexpr S   s2 = fn3<S>();
    constexpr int i2 = fn3<int>();

    printf("%i %i %i %i\n", s1.value, i1, s2.value, i2);
    return 0;
}


Under gcc this compiles (correctly IMHO) without fn1-3 being called or being
translated to the binary, since they are used solely within constexpr
expressions.  Here is the output from "gcc -std=c++0x test.cpp -S":


    .file    "test.cpp"
    .section    .rodata
.LC0:
    .string    "%i %i %i %i\n"
    .text
    .globl    main
    .type    main, @function
main:
.LFB5:
    .cfi_startproc
    pushq    %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movl    $1234, -16(%rbp)
    movl    $1234, -4(%rbp)
    movl    $1234, -32(%rbp)
    movl    $1234, -8(%rbp)
    movl    -32(%rbp), %edx
    movl    -16(%rbp), %eax
    movl    $1234, %r8d
    movl    %edx, %ecx
    movl    $1234, %edx
    movl    %eax, %esi
    movl    $.LC0, %edi
    movl    $0, %eax
    call    printf
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE5:
    .size    main, .-main
    .section    .rodata
    .align 4
    .type    _ZL2s1, @object
    .size    _ZL2s1, 4
_ZL2s1:
    .long    1234
    .align 4
    .type    _ZL2i1, @object
    .size    _ZL2i1, 4
_ZL2i1:
    .long    1234
    .align 4
    .type    _ZL2s2, @object
    .size    _ZL2s2, 4
_ZL2s2:
    .long    1234
    .align 4
    .type    _ZL2i2, @object
    .size    _ZL2i2, 4
_ZL2i2:
    .long    1234
    .ident    "GCC: (Debian 4.6.3-1) 4.6.3"
    .section    .note.GNU-stack,"", at progbits


Contrast this with the equivalent output from "clang -std=c++11 test.cpp -S":


    .file    "test.cpp"
    .text
    .globl    main
    .align    16, 0x90
    .type    main, at function
main:                                   # @main
    .cfi_startproc
# BB#0:
    pushq    %rbp
.Ltmp2:
    .cfi_def_cfa_offset 16
.Ltmp3:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
.Ltmp4:
    .cfi_def_cfa_register %rbp
    subq    $32, %rsp
    movl    $0, -4(%rbp)
    callq    _Z3fn2v
    movl    %eax, -8(%rbp)
    callq    _Z3fn3IiET_v
    leaq    .L.str, %rdi
    movl    $1234, %ecx             # imm = 0x4D2
    movl    %eax, -12(%rbp)
    movl    %ecx, %esi
    movl    %ecx, %edx
    movl    %ecx, -16(%rbp)         # 4-byte Spill
    movl    -16(%rbp), %r8d         # 4-byte Reload
    movb    $0, %al
    callq    printf
    movl    $0, %ecx
    movl    %eax, -20(%rbp)         # 4-byte Spill
    movl    %ecx, %eax
    addq    $32, %rsp
    popq    %rbp
    ret
.Ltmp5:
    .size    main, .Ltmp5-main
    .cfi_endproc

    .section    .text._Z3fn2v,"axG", at progbits,_Z3fn2v,comdat
    .weak    _Z3fn2v
    .align    16, 0x90
    .type    _Z3fn2v, at function
_Z3fn2v:                                # @_Z3fn2v
    .cfi_startproc
# BB#0:
    movl    $1234, %eax             # imm = 0x4D2
    ret
.Ltmp6:
    .size    _Z3fn2v, .Ltmp6-_Z3fn2v
    .cfi_endproc

    .section    .text._Z3fn3IiET_v,"axG", at progbits,_Z3fn3IiET_v,comdat
    .weak    _Z3fn3IiET_v
    .align    16, 0x90
    .type    _Z3fn3IiET_v, at function
_Z3fn3IiET_v:                           # @_Z3fn3IiET_v
    .cfi_startproc
# BB#0:
    movl    $1234, %eax             # imm = 0x4D2
    ret
.Ltmp7:
    .size    _Z3fn3IiET_v, .Ltmp7-_Z3fn3IiET_v
    .cfi_endproc

    .type    _ZL2s1, at object          # @_ZL2s1
    .section    .rodata,"a", at progbits
    .align    4
_ZL2s1:
    .long    1234                    # 0x4d2
    .size    _ZL2s1, 4

    .type    _ZL2i1, at object          # @_ZL2i1
    .align    4
_ZL2i1:
    .long    1234                    # 0x4d2
    .size    _ZL2i1, 4

    .type    _ZL2s2, at object          # @_ZL2s2
    .align    4
_ZL2s2:
    .long    1234                    # 0x4d2
    .size    _ZL2s2, 4

    .type    _ZL2i2, at object          # @_ZL2i2
    .align    4
_ZL2i2:
    .long    1234                    # 0x4d2
    .size    _ZL2i2, 4

    .type    _ZZ4mainE2s1, at object    # @_ZZ4mainE2s1
    .align    4
_ZZ4mainE2s1:
    .long    1234                    # 0x4d2
    .size    _ZZ4mainE2s1, 4

    .type    _ZZ4mainE2s2, at object    # @_ZZ4mainE2s2
    .align    4
_ZZ4mainE2s2:
    .long    1234                    # 0x4d2
    .size    _ZZ4mainE2s2, 4

    .type    .L.str, at object          # @.str
    .section    .rodata.str1.1,"aMS", at progbits,1
.L.str:
    .asciz     "%i %i %i %i\n"
    .size    .L.str, 13

    .section    ".note.GNU-stack","", at progbits


In clang, the functions fn2() and fn3<int>() end up in the resultant binary and
are called (even though the results are not used) inside main().  The
inconsistency is that fn1() and fn3<S>() are treated correctly, as are all of
the global constexpr variable declarations; only those of type int inside
main() are not treated correctly.

While the example code above is pretty trivial, I have a more extensive set of
code making use of heavily recursive constexpr functions and the additional
redundant call is causing huge performance issues when compiling with debug
(i.e. without optimisation), even though the compiler is generating the values
at compile time.

-- 
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.



More information about the llvm-bugs mailing list