[cfe-users] Constexpr prevents optimization?

Fri Feb 16 01:51:26 PST 2018

Dear all,

I was just playing around with a toy example when I noticed an oddity in
the code generated by clang-5.0.0 (and also in clang-5.0.1) regarding
constexpr.

Given the code:
> int fib(int i) { if (i <= 0) return i; else return (fib(i - 1) + fib(i - 2)) % 100; }
> int main()
> {
>     int ret = 0;
>     for (int i = 0; i < 10; ++i)
>         ret += fib(39);
>     return ret;
> }

Compile it with clang++ -O3 and what you get is (gdb disassembly of "main"):
> 7       {
> 8           int ret = 0;
> 9           for (int i = 0; i < 10; ++i)
> 10              ret += fib(39);
>    0x00000000004004e0 <+0>:     push   rax
>    0x00000000004004e1 <+1>:     mov    edi,0x27
>    0x00000000004004e6 <+6>:     call   0x400490 <fib(int)>
>
> 9           for (int i = 0; i < 10; ++i)
>    0x00000000004004eb <+11>:    add    eax,eax
>    0x00000000004004ed <+13>:    lea    eax,[rax+rax*4]
>
> 11          return ret;
>    0x00000000004004f0 <+16>:    pop    rcx
>    0x00000000004004f1 <+17>:    ret

A call to fib(39) once followed by a multiplication with 10.

Now, if you make "fib" constexpr, i.e.:
> constexpr int fib(int i) { if (i <= 0) return i; else return (fib(i - 1) + fib(i - 2)) % 100; }

And, again, compile it with -O3 and disassemble "main":
> 7       {
> 8           int ret = 0;
> 9           for (int i = 0; i < 10; ++i)
> 10              ret += fib(39);
>    0x0000000000400490 <+0>:     push   rbp
>    0x0000000000400491 <+1>:     push   rbx
>    0x0000000000400492 <+2>:     push   rax
>    0x0000000000400493 <+3>:     mov    edi,0x27
>    0x0000000000400498 <+8>:     call   0x400530 <fib(int)>
>    0x000000000040049d <+13>:    mov    ebx,eax
>    0x000000000040049f <+15>:    mov    edi,0x27
>    0x00000000004004a4 <+20>:    call   0x400530 <fib(int)>
>    0x00000000004004a9 <+25>:    mov    ebp,eax
>    0x00000000004004ab <+27>:    add    ebp,ebx
>    0x00000000004004ad <+29>:    mov    edi,0x27
>    0x00000000004004b2 <+34>:    call   0x400530 <fib(int)>
>    0x00000000004004b7 <+39>:    mov    ebx,eax
>    0x00000000004004b9 <+41>:    add    ebx,ebp
>    0x00000000004004bb <+43>:    mov    edi,0x27
>    0x00000000004004c0 <+48>:    call   0x400530 <fib(int)>
>    0x00000000004004c5 <+53>:    mov    ebp,eax
>    0x00000000004004c7 <+55>:    add    ebp,ebx
>    0x00000000004004c9 <+57>:    mov    edi,0x27
>    0x00000000004004ce <+62>:    call   0x400530 <fib(int)>
>    0x00000000004004d3 <+67>:    mov    ebx,eax
>    0x00000000004004d5 <+69>:    add    ebx,ebp
>    0x00000000004004d7 <+71>:    mov    edi,0x27
>    0x00000000004004dc <+76>:    call   0x400530 <fib(int)>
>    0x00000000004004e1 <+81>:    mov    ebp,eax
>    0x00000000004004e3 <+83>:    add    ebp,ebx
>    0x00000000004004e5 <+85>:    mov    edi,0x27
>    0x00000000004004ea <+90>:    call   0x400530 <fib(int)>
>    0x00000000004004ef <+95>:    mov    ebx,eax
>    0x00000000004004f1 <+97>:    add    ebx,ebp
>    0x00000000004004f3 <+99>:    mov    edi,0x27
>    0x00000000004004f8 <+104>:   call   0x400530 <fib(int)>
>    0x00000000004004fd <+109>:   mov    ebp,eax
>    0x00000000004004ff <+111>:   add    ebp,ebx
>    0x0000000000400501 <+113>:   mov    edi,0x27
>    0x0000000000400506 <+118>:   call   0x400530 <fib(int)>
>    0x000000000040050b <+123>:   mov    ebx,eax
>    0x000000000040050d <+125>:   add    ebx,ebp
>    0x000000000040050f <+127>:   mov    edi,0x27
>    0x0000000000400514 <+132>:   call   0x400530 <fib(int)>
>    0x0000000000400519 <+137>:   add    eax,ebx
>
> 11          return ret;
>    0x000000000040051b <+139>:   add    rsp,0x8
>    0x000000000040051f <+143>:   pop    rbx
>    0x0000000000400520 <+144>:   pop    rbp
>    0x0000000000400521 <+145>:   ret

That's 10 calls to function "fib" (for which the assembly is essentially
the same as in the example above).

Regardless of whether the function is evaluated at compile time or not,
it seems odd to me that using constexpr here prohibits clang from
emitting the very same code as in the non-constexpr example. Note
however, that if you declare "fib" to be "static constexpr" clang,
again, emits the multiplication code.

Is there something keeping clang from producing the multiplication code
for a non-static constexpr example that I don't see? And why is the
optimization possible again if one makes "fib" static?

Greetings,
Steffen