<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - std::swap bad code gen -- alias analysis insufficient to remove dead store"
href="https://bugs.llvm.org/show_bug.cgi?id=42288">42288</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>std::swap bad code gen -- alias analysis insufficient to remove dead store
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>david@doublewise.net
</td>
</tr>
<tr>
<th>CC</th>
<td>htmldeveloper@gmail.com, llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>The following code optimizes well for `custom_swap` and `restrict_std_swap`,
but has an additional `mov` instruction for `std_swap`:
void custom_swap(int * lhs, int * rhs) {
int temp = *lhs;
*lhs = *rhs;
*rhs = temp;
}
void restrict_std_swap(int * __restrict lhs, int * __restrict rhs) {
int temp = *lhs;
*lhs = 0;
*lhs = *rhs;
*rhs = temp;
}
void std_swap(int * lhs, int * rhs) {
int temp = *lhs;
*lhs = 0;
*lhs = *rhs;
*rhs = temp;
}
Compiles into this IR with -O1, -O2, -Os, or -O3:
define dso_local void @_Z11custom_swapPiS_(i32* nocapture, i32* nocapture)
local_unnamed_addr #0 !dbg !7 {
call void @llvm.dbg.value(metadata i32* %0, metadata !14, metadata
!DIExpression()), !dbg !17
call void @llvm.dbg.value(metadata i32* %1, metadata !15, metadata
!DIExpression()), !dbg !17
%3 = load i32, i32* %0, align 4, !dbg !18, !tbaa !19
call void @llvm.dbg.value(metadata i32 %3, metadata !16, metadata
!DIExpression()), !dbg !17
%4 = load i32, i32* %1, align 4, !dbg !23, !tbaa !19
store i32 %4, i32* %0, align 4, !dbg !24, !tbaa !19
store i32 %3, i32* %1, align 4, !dbg !25, !tbaa !19
ret void, !dbg !26
}
define dso_local void @_Z17restrict_std_swapPiS_(i32* noalias nocapture, i32*
noalias nocapture) local_unnamed_addr #0 !dbg !27 {
call void @llvm.dbg.value(metadata i32* %0, metadata !32, metadata
!DIExpression()), !dbg !35
call void @llvm.dbg.value(metadata i32* %1, metadata !33, metadata
!DIExpression()), !dbg !35
%3 = load i32, i32* %0, align 4, !dbg !36, !tbaa !19
call void @llvm.dbg.value(metadata i32 %3, metadata !34, metadata
!DIExpression()), !dbg !35
%4 = load i32, i32* %1, align 4, !dbg !37, !tbaa !19
store i32 %4, i32* %0, align 4, !dbg !38, !tbaa !19
store i32 %3, i32* %1, align 4, !dbg !39, !tbaa !19
ret void, !dbg !40
}
define dso_local void @_Z8std_swapPiS_(i32* nocapture, i32* nocapture)
local_unnamed_addr #0 !dbg !41 {
call void @llvm.dbg.value(metadata i32* %0, metadata !43, metadata
!DIExpression()), !dbg !46
call void @llvm.dbg.value(metadata i32* %1, metadata !44, metadata
!DIExpression()), !dbg !46
%3 = load i32, i32* %0, align 4, !dbg !47, !tbaa !19
call void @llvm.dbg.value(metadata i32 %3, metadata !45, metadata
!DIExpression()), !dbg !46
store i32 0, i32* %0, align 4, !dbg !48, !tbaa !19
%4 = load i32, i32* %1, align 4, !dbg !49, !tbaa !19
store i32 %4, i32* %0, align 4, !dbg !50, !tbaa !19
store i32 %3, i32* %1, align 4, !dbg !51, !tbaa !19
ret void, !dbg !52
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { norecurse nounwind uwtable
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false"
"less-precise-fpmad"="false" "min-legal-vector-width"="0"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-jump-tables"="false" "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false" "no-trapping-math"="false"
"stack-protector-buffer-size"="8" "target-cpu"="x86-64"
"target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
As we see from the example that annotates the parameters with __restrict, the
problem appears to be that the risk of *lhs aliasing *rhs disables the
optimizer's ability to remove the dead store in the second line of std_swap. It
is able to see that if they don't alias, the store in line 2 is dead. It is not
able to see that if they do alias, the store in line 3 is dead and the store in
line 2 is dead.
See it live: <a href="https://godbolt.org/z/8nCTnL">https://godbolt.org/z/8nCTnL</a>
The real life problem here is that types that manage a resource but do not
implement a custom std::swap, as well as all types that recursively contain a
type that manages a resource, suffer from reduced performance for using
std::swap. The larger, slightly more meaningful test case showing how I arrived
at this reduction and its relationship to std::swap:
struct unique_ptr {
unique_ptr():
ptr(nullptr)
{
}
unique_ptr(unique_ptr && other) noexcept:
ptr(other.ptr)
{
other.ptr = nullptr;
}
unique_ptr & operator=(unique_ptr && other) noexcept {
delete ptr;
ptr = nullptr;
ptr = other.ptr;
other.ptr = nullptr;
return *this;
}
~unique_ptr() noexcept {
delete ptr;
}
int * ptr;
};
void custom_swap(unique_ptr & lhs, unique_ptr & rhs) noexcept {
int * temp = lhs.ptr;
lhs.ptr = rhs.ptr;
rhs.ptr = temp;
}
void inlined_std_swap(unique_ptr & lhs, unique_ptr & rhs) noexcept {
int * temp_ptr = lhs.ptr;
lhs.ptr = nullptr;
delete lhs.ptr;
lhs.ptr = nullptr;
lhs.ptr = rhs.ptr;
rhs.ptr = nullptr;
delete rhs.ptr;
rhs.ptr = nullptr;
rhs.ptr = temp_ptr;
temp_ptr = nullptr;
delete temp_ptr;
}
void std_swap(unique_ptr & lhs, unique_ptr & rhs) noexcept {
auto temp = static_cast<unique_ptr &&>(lhs);
lhs = static_cast<unique_ptr &&>(rhs);
rhs = static_cast<unique_ptr &&>(temp);
}
See it live: <a href="https://godbolt.org/z/tWjmzo">https://godbolt.org/z/tWjmzo</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>