[llvm-bugs] [Bug 52412] New: Miscompilation with LTO + -Oz
via llvm-bugs
llvm-bugs at lists.llvm.org
Thu Nov 4 17:44:40 PDT 2021
https://bugs.llvm.org/show_bug.cgi?id=52412
Bug ID: 52412
Summary: Miscompilation with LTO + -Oz
Product: libraries
Version: 13.0
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Interprocedural Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: mh+llvm at glandium.org
CC: llvm-bugs at lists.llvm.org, nikita.ppv at gmail.com,
smeenai at fb.com
Created attachment 25422
--> https://bugs.llvm.org/attachment.cgi?id=25422&action=edit
testcase.cpp
The following code (reduced from the attached code, which is itself reduced
from the fully preprocessed code extracted from a Firefox build, but is more
realistic than the reduced version below) is miscompiled (note: this was
originally found on x86_64 android, but it seems to happen on at least x86_64
and aarch64 linux):
```
typedef long unsigned int size_t;
extern "C" {
void *malloc(size_t);
void free(void *);
}
void *operator new(size_t, void *p) { return p; }
void *operator new[](size_t size) noexcept(false) { return malloc(size); }
void operator delete[](void *ptr) noexcept(true) { return free(ptr); }
template <typename T> class UniquePtr;
template <typename T> class UniquePtr<T[]> {
typedef T *Pointer;
Pointer mPtr;
public:
explicit UniquePtr(Pointer aPtr) : mPtr(aPtr) {}
UniquePtr(UniquePtr &&aOther) : mPtr(aOther.release()) {}
~UniquePtr() {
Pointer old = mPtr;
mPtr = nullptr;
if (old != nullptr) {
delete[] old;
}
}
Pointer get() const { return mPtr; }
Pointer release() {
Pointer p = mPtr;
mPtr = nullptr;
return p;
}
};
template <typename T> struct Vector {
__attribute__((noinline)) bool growStorageBy(size_t aIncr) {
T *newbuf = static_cast<T *>(malloc(sizeof(T)));
if (!newbuf)
return false;
mBegin = newbuf;
return true;
}
T *mBegin;
size_t mLength;
size_t mCapacity;
bool append(T &&aU) {
if (mLength == mCapacity) {
if (!growStorageBy(1))
return false;
}
new (mBegin + mLength) T(static_cast<T &&>(aU));
++mLength;
return true;
}
};
struct ChunkedJSONWriteFunc {
void AllocChunk(size_t aChunkSize);
char *mChunkPtr;
Vector<UniquePtr<char[]>> mChunkList;
};
void ChunkedJSONWriteFunc::AllocChunk(size_t aChunkSize) {
UniquePtr<char[]> newChunk = UniquePtr<char[]>(new char[aChunkSize]);
mChunkPtr = newChunk.get();
mChunkList.append(static_cast<UniquePtr<char[]> &&>(newChunk));
}
```
when compiled with:
```
clang++ -shared -std=gnu++17 --target=x86_64-linux-gnu -o testcase.so
-flto=thin -fPIC -Oz testcase.cpp -nostdlib -fuse-ld=lld
```
The resulting machine code for AllocChunk looks like:
```
00000000000014c2 <_ZN20ChunkedJSONWriteFunc10AllocChunkEm>:
14c2: 41 56 push %r14
14c4: 53 push %rbx
14c5: 50 push %rax
14c6: 48 89 fb mov %rdi,%rbx
14c9: 48 89 f7 mov %rsi,%rdi
14cc: e8 4f 00 00 00 callq 1520 <malloc at plt>
14d1: 49 89 c6 mov %rax,%r14
14d4: 48 89 03 mov %rax,(%rbx)
14d7: 48 83 c3 08 add $0x8,%rbx
14db: 48 89 df mov %rbx,%rdi
14de: e8 0f 00 00 00 callq 14f2
<_ZN6VectorI9UniquePtrIA_cEE13growStorageByEm>
14e3: 4c 89 f7 mov %r14,%rdi
14e6: 48 83 c4 08 add $0x8,%rsp
14ea: 5b pop %rbx
14eb: 41 5e pop %r14
14ed: e9 3e 00 00 00 jmpq 1530 <free at plt>
```
The code is compile as if growStorageBy always returns false, in which case the
UniquePtr is not released and is thus free'd.
When the `__attribute__((noinline))` is removed from growStorageBy, this goes
even further, making the function literally empty, placed at the end of .text
and before .plt, without a ret, so when called, we'd fall through to the first
entry in the plt.
The less reduced testcase requires that the .o is compiled with -Oz and the
linker use -O2 so you'd need to split the commands.
Looking at the IR between passes, it seems things go awry after the
InlinerPass. This is what the IR for AllocChunk looks like after inlining
append:
```
*** IR Dump After InlinerPass on (_ZN20ChunkedJSONWriteFunc10AllocChunkEm) ***
; Function Attrs: minsize optsize uwtable
define void
@_ZN20ChunkedJSONWriteFunc10AllocChunkEm(%struct.ChunkedJSONWriteFunc* nonnull
align 8 dereferenceable(32) %0, i64 %1) local_unnamed_addr #5 align 2
personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
%3 = alloca %class.UniquePtr, align 8
%4 = bitcast %class.UniquePtr* %3 to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #8
%5 = tail call noalias nonnull align 16 i8* @malloc(i64 %1) #9
%6 = getelementptr inbounds %class.UniquePtr, %class.UniquePtr* %3, i64 0,
i32 0
store i8* %5, i8** %6, align 8, !tbaa !5
%7 = getelementptr inbounds %struct.ChunkedJSONWriteFunc,
%struct.ChunkedJSONWriteFunc* %0, i64 0, i32 0
store i8* %5, i8** %7, align 8, !tbaa !10
%8 = getelementptr inbounds %struct.ChunkedJSONWriteFunc,
%struct.ChunkedJSONWriteFunc* %0, i64 0, i32 1
%9 = getelementptr inbounds %struct.Vector, %struct.Vector* %8, i64 0, i32 1
%10 = load i64, i64* %9, align 8, !tbaa !14
%11 = getelementptr inbounds %struct.Vector, %struct.Vector* %8, i64 0, i32 2
%12 = load i64, i64* %11, align 8, !tbaa !15
%13 = icmp eq i64 %10, %12
br i1 %13, label %14, label %18
14: ; preds = %2
%15 = call fastcc zeroext i1
@_ZN6VectorI9UniquePtrIA_cEE13growStorageByEm(%struct.Vector* nonnull align 8
dereferenceable(24) %8) #9
br i1 %15, label %16, label %26
16: ; preds = %14
%17 = load i64, i64* %9, align 8, !tbaa !14
br label %18
18: ; preds = %16, %2
%19 = phi i64 [ %17, %16 ], [ %10, %2 ]
%20 = getelementptr inbounds %struct.Vector, %struct.Vector* %8, i64 0, i32 0
%21 = load %class.UniquePtr*, %class.UniquePtr** %20, align 8, !tbaa !16
%22 = getelementptr inbounds %class.UniquePtr, %class.UniquePtr* %21, i64
%19, i32 0
%23 = getelementptr inbounds %class.UniquePtr, %class.UniquePtr* %3, i64 0,
i32 0
%24 = load i8*, i8** %23, align 8, !tbaa !5
store i8* null, i8** %23, align 8, !tbaa !5
store i8* %24, i8** %22, align 8, !tbaa !5
%25 = add i64 %19, 1
store i64 %25, i64* %9, align 8, !tbaa !14
br label %26
26: ; preds = %14, %18
br label %27
27: ; preds = %26
%28 = load i8*, i8** %6, align 8, !tbaa !5
store i8* null, i8** %6, align 8, !tbaa !5
call void @free(i8* nonnull %28) #9
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #8
ret void
29: ; No predecessors!
%30 = landingpad { i8*, i32 }
cleanup
%31 = load i8*, i8** %6, align 8, !tbaa !5
store i8* null, i8** %6, align 8, !tbaa !5
call void @free(i8* nonnull %31) #9
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #8
resume { i8*, i32 } %30
}
```
Note how the UniquePtr is released after the pointer is read for the call to
free in basic block at label #27.
This appears to be fixed on trunk, via https://reviews.llvm.org/D111515, but
I'd rather have someone look at whether this was fixed by chance, or if the
root cause of this miscompilation was, indeed, fixed there.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20211105/82d89312/attachment-0001.html>
More information about the llvm-bugs
mailing list