<html>
<head>
<base href="https://llvm.org/bugs/" />
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW --- - Poor code generation depending on constexprness of function"
href="https://llvm.org/bugs/show_bug.cgi?id=31797">31797</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Poor code generation depending on constexprness of function
</td>
</tr>
<tr>
<th>Product</th>
<td>clang
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>C++
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedclangbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>ldionne.2@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>dgregor@apple.com, llvm-bugs@lists.llvm.org
</td>
</tr>
<tr>
<th>Classification</th>
<td>Unclassified
</td>
</tr></table>
<p>
<div>
<pre>Hi,
The following snippet results in much different codegen depending on whether
the `make_pair` function is marked as `constexpr` or not:
#include <utility>
template <typename F, typename S>
// constexpr // <--- TRY UNCOMMENTING THIS, THE DYNAMIC CALL DISAPPEARS
std::pair<F, S> make_pair(F f, S s) { return std::pair<F, S>{f, s}; }
template <typename Pair>
struct map {
explicit constexpr map(Pair p) : pair_(p) { }
Pair pair_;
};
template <typename Pair>
constexpr map<Pair> make_map(Pair pair)
{ return map<Pair>{pair}; }
struct increment_tag { };
using VTable = map<std::pair<increment_tag, void (*)(void*)>>;
template <typename T>
void increment(void* self) { ++*static_cast<T*>(self); }
template <typename T>
static VTable const vtable = make_map(make_pair(increment_tag{},
&increment<T>));
struct any_iterator {
template <typename It>
explicit any_iterator(It it) : vptr_{&vtable<It>}, self_{new It(it)} { }
VTable const* vptr_;
void* self_;
};
int main() {
int array[3] = {0};
any_iterator it{&array[0]};
it.vptr_->pair_.second(it.self_);
}
Without the `constexpr` qualifier, the generated code (-O3) is this:
main: # @main
sub rsp, 24
mov dword ptr [rsp + 16], 0
mov qword ptr [rsp + 8], 0
mov edi, 8
call operator new(unsigned long)
lea rcx, [rsp + 8]
mov qword ptr [rax], rcx
mov rdi, rax
call qword ptr [rip + vtable<int*>+8]
xor eax, eax
add rsp, 24
ret
__cxx_global_var_init: # @__cxx_global_var_init
mov qword ptr [rip + vtable<int*>+8], void increment<int*>(void*)
ret
void increment<int*>(void*): # @void
increment<int*>(void*)
add qword ptr [rdi], 4
ret
With the `constexpr` qualifier, the optimizer seems to see through the
initialization of the vtable, and it is able to collapse everything:
main: # @main
xor eax, eax
ret
Same example on Godbolt: <a href="https://godbolt.org/g/dSzAe2">https://godbolt.org/g/dSzAe2</a>
More involved example on Wandbox, where the codegen difference makes a
significant performance difference:
<a href="http://melpon.org/wandbox/permlink/06SYXAs9q5D19sxx">http://melpon.org/wandbox/permlink/06SYXAs9q5D19sxx</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>