[llvm] Object: Don't error out on malformed bitcode files. (PR #96848)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 9 15:40:41 PDT 2024
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/96848
>From 8c96c01d38d69ac5dbc25db6b58a662541f838ef Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Wed, 26 Jun 2024 21:44:28 -0700
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6-beta.1
---
llvm/lib/Object/ArchiveWriter.cpp | 39 +++++++++++++++++--
.../test/Object/archive-malformed-object.test | 26 ++++++++++---
2 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 913b74c110b36..c6d443ff9d15a 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -482,7 +482,8 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
}
static Expected<std::unique_ptr<SymbolicFile>>
-getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
+getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context,
+ object::Archive::Kind Kind) {
const file_magic Type = identify_magic(Buf.getBuffer());
// Don't attempt to read non-symbolic file types.
if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
@@ -490,8 +491,38 @@ getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
if (Type == file_magic::bitcode) {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
Buf, file_magic::bitcode, &Context);
- if (!ObjOrErr)
- return ObjOrErr.takeError();
+ // An error reading a bitcode file most likely indicates that the file
+ // was created by a compiler from the future. Normally we don't try to
+ // implement forwards compatibility for bitcode files, but when creating an
+ // archive we can implement best-effort forwards compatibility by treating
+ // the file as a blob and not creating symbol index entries for it. lld and
+ // mold ignore the archive symbol index, so provided that you use one of
+ // these linkers, LTO will work as long as lld or the gold plugin is newer
+ // than the compiler. We only ignore errors if the archive format is one
+ // that is supported by a linker that is known to ignore the index,
+ // otherwise there's no chance of this working so we may as well error out.
+ // We print a warning on read failure so that users of linkers that rely on
+ // the symbol index can diagnose the issue.
+ //
+ // This is the same behavior as GNU ar when the linker plugin returns an
+ // error when reading the input file. If the bitcode file is actually
+ // malformed, it will be diagnosed at link time.
+ if (!ObjOrErr) {
+ switch (Kind) {
+ case object::Archive::K_BSD:
+ case object::Archive::K_GNU:
+ case object::Archive::K_GNU64:
+ llvm::logAllUnhandledErrors(ObjOrErr.takeError(), llvm::errs(),
+ "warning: " + Buf.getBufferIdentifier() +
+ ": ");
+ return nullptr;
+ case object::Archive::K_AIXBIG:
+ case object::Archive::K_COFF:
+ case object::Archive::K_DARWIN:
+ case object::Archive::K_DARWIN64:
+ return ObjOrErr.takeError();
+ }
+ }
return std::move(*ObjOrErr);
} else {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
@@ -820,7 +851,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
for (const NewArchiveMember &M : NewMembers) {
Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
- getSymbolicFile(M.Buf->getMemBufferRef(), Context);
+ getSymbolicFile(M.Buf->getMemBufferRef(), Context, Kind);
if (!SymFileOrErr)
return createFileError(M.MemberName, SymFileOrErr.takeError());
SymFiles.push_back(std::move(*SymFileOrErr));
diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test
index a92762975bda6..7492dc513492e 100644
--- a/llvm/test/Object/archive-malformed-object.test
+++ b/llvm/test/Object/archive-malformed-object.test
@@ -1,5 +1,6 @@
## Show that the archive library emits error messages when adding malformed
-## objects.
+## object files and skips symbol tables for "malformed" bitcode files, which
+## are assumed to be bitcode files generated by compilers from the future.
# RUN: rm -rf %t.dir
# RUN: split-file %s %t.dir
@@ -9,19 +10,28 @@
# RUN: llvm-as input.ll -o input.bc
# RUN: cp input.bc good.bc
# RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)"
-# RUN: not llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+
+# llvm-nm will fail when it tries to read the malformed bitcode file, but
+# it's supposed to print the archive map first, which in this case it
+# won't because there won't be one.
+# RUN: not llvm-nm --print-armap bad.a | count 0
## Malformed bitcode object is the last file member of archive if the symbol table is required.
# RUN: rm -rf bad.a
-# RUN: not llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: not llvm-nm --print-armap bad.a | FileCheck %s --check-prefix=ARMAP
## Malformed bitcode object if the symbol table is not required for big archive.
+## For big archives we print an error instead of a warning because the AIX linker
+## presumably requires the index.
# RUN: rm -rf bad.a
# RUN: not llvm-ar --format=bigarchive rcS bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
# RUN: rm -rf bad.a
# RUN: not llvm-ar --format=bigarchive rcS bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
# ERR1: error: bad.a: 'input.bc': Invalid bitcode signature
+# WARN1: warning: input.bc: Invalid bitcode signature
## Non-bitcode malformed file.
# RUN: yaml2obj input.yaml -o input.o
@@ -29,17 +39,23 @@
# ERR2: error: bad.a: 'input.o': section header table goes past the end of the file: e_shoff = 0x9999
-## Don't emit an error if the symbol table is not required for formats other than the big archive format.
-# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc
+## Don't emit an error or warning if the symbol table is not required for formats other than the big archive format.
+# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc 2>&1 | count 0
# RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS
# CONTENTS: input.o
# CONTENTS-NEXT: input.bc
+# ARMAP: Archive map
+# ARMAP-NEXT: foo in good.bc
+# ARMAP-EMPTY:
+
#--- input.ll
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux"
+ at foo = global i32 1
+
#--- input.yaml
--- !ELF
FileHeader:
>From dac260c18c5bdae3d560f663c73af673a9ca853a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Tue, 9 Jul 2024 15:27:42 -0700
Subject: [PATCH 2/3] Improve tests, change API
Created using spr 1.3.6-beta.1
---
llvm/include/llvm/Object/ArchiveWriter.h | 8 +++--
llvm/lib/Object/ArchiveWriter.cpp | 36 +++++++++++--------
.../test/Object/archive-malformed-object.test | 15 +++++++-
3 files changed, 42 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index a19f8fcc79d74..f965a12c45267 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -48,18 +48,22 @@ enum class SymtabWritingMode {
BigArchive64 // Only write the 64-bit symbol table.
};
+void warnToStderr(Error Err);
+
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr,
- std::optional<bool> IsEC = std::nullopt);
+ std::optional<bool> IsEC = std::nullopt,
+ function_ref<void(Error)> Warn = warnToStderr);
// writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
// buffer instead of writing it out to a file.
Expected<std::unique_ptr<MemoryBuffer>>
writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin);
+ bool Deterministic, bool Thin,
+ function_ref<void(Error)> Warn = warnToStderr);
}
#endif
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index c6d443ff9d15a..1c75268322257 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -483,7 +483,7 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
static Expected<std::unique_ptr<SymbolicFile>>
getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context,
- object::Archive::Kind Kind) {
+ object::Archive::Kind Kind, function_ref<void(Error)> Warn) {
const file_magic Type = identify_magic(Buf.getBuffer());
// Don't attempt to read non-symbolic file types.
if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
@@ -512,9 +512,7 @@ getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context,
case object::Archive::K_BSD:
case object::Archive::K_GNU:
case object::Archive::K_GNU64:
- llvm::logAllUnhandledErrors(ObjOrErr.takeError(), llvm::errs(),
- "warning: " + Buf.getBufferIdentifier() +
- ": ");
+ Warn(ObjOrErr.takeError());
return nullptr;
case object::Archive::K_AIXBIG:
case object::Archive::K_COFF:
@@ -782,7 +780,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, bool Deterministic,
SymtabWritingMode NeedSymbols, SymMap *SymMap,
LLVMContext &Context, ArrayRef<NewArchiveMember> NewMembers,
- std::optional<bool> IsEC) {
+ std::optional<bool> IsEC, function_ref<void(Error)> Warn) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
uint64_t MemHeadPadSize = 0;
uint64_t Pos =
@@ -850,8 +848,10 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
for (const NewArchiveMember &M : NewMembers) {
- Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
- getSymbolicFile(M.Buf->getMemBufferRef(), Context, Kind);
+ Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr = getSymbolicFile(
+ M.Buf->getMemBufferRef(), Context, Kind, [&](Error Err) {
+ Warn(createFileError(M.MemberName, std::move(Err)));
+ });
if (!SymFileOrErr)
return createFileError(M.MemberName, SymFileOrErr.takeError());
SymFiles.push_back(std::move(*SymFileOrErr));
@@ -1031,7 +1031,8 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
static Error
writeArchiveToStream(raw_ostream &Out, ArrayRef<NewArchiveMember> NewMembers,
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin, std::optional<bool> IsEC) {
+ bool Deterministic, bool Thin, std::optional<bool> IsEC,
+ function_ref<void(Error)> Warn) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
SmallString<0> SymNamesBuf;
@@ -1053,7 +1054,7 @@ writeArchiveToStream(raw_ostream &Out, ArrayRef<NewArchiveMember> NewMembers,
Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
- isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC);
+ isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC, Warn);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
@@ -1296,11 +1297,16 @@ writeArchiveToStream(raw_ostream &Out, ArrayRef<NewArchiveMember> NewMembers,
return Error::success();
}
+void warnToStderr(Error Err) {
+ llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "warning: ");
+}
+
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf,
- std::optional<bool> IsEC) {
+ std::optional<bool> IsEC,
+ function_ref<void(Error)> Warn) {
Expected<sys::fs::TempFile> Temp =
sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
if (!Temp)
@@ -1308,7 +1314,7 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
raw_fd_ostream Out(Temp->FD, false);
if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
- Deterministic, Thin, IsEC)) {
+ Deterministic, Thin, IsEC, Warn)) {
if (Error DiscardError = Temp->discard())
return joinErrors(std::move(E), std::move(DiscardError));
return E;
@@ -1332,12 +1338,14 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
Expected<std::unique_ptr<MemoryBuffer>>
writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin) {
+ bool Deterministic, bool Thin,
+ function_ref<void(Error)> Warn) {
SmallVector<char, 0> ArchiveBufferVector;
raw_svector_ostream ArchiveStream(ArchiveBufferVector);
- if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
- Kind, Deterministic, Thin, std::nullopt))
+ if (Error E =
+ writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, Kind,
+ Deterministic, Thin, std::nullopt, Warn))
return std::move(E);
return std::make_unique<SmallVectorMemoryBuffer>(
diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test
index 7492dc513492e..147fcdf1e68c2 100644
--- a/llvm/test/Object/archive-malformed-object.test
+++ b/llvm/test/Object/archive-malformed-object.test
@@ -22,6 +22,19 @@
# RUN: llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
# RUN: not llvm-nm --print-armap bad.a | FileCheck %s --check-prefix=ARMAP
+## Malformed bitcode object either warn or error depending on the archive format
+## (see switch in getSymbolicFile).
+# RUN: rm -rf bad.a
+# RUN: llvm-ar --format=bsd rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: rm -rf bad.a
+# RUN: llvm-ar --format=gnu rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: rm -rf bad.a
+# RUN: not llvm-ar --format=bigarchive rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: rm -rf bad.a
+# RUN: not llvm-ar --format=coff rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: rm -rf bad.a
+# RUN: not llvm-ar --format=darwin rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+
## Malformed bitcode object if the symbol table is not required for big archive.
## For big archives we print an error instead of a warning because the AIX linker
## presumably requires the index.
@@ -31,7 +44,7 @@
# RUN: not llvm-ar --format=bigarchive rcS bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
# ERR1: error: bad.a: 'input.bc': Invalid bitcode signature
-# WARN1: warning: input.bc: Invalid bitcode signature
+# WARN1: warning: 'input.bc': Invalid bitcode signature
## Non-bitcode malformed file.
# RUN: yaml2obj input.yaml -o input.o
>From df902096d13c57840883c304c256eedda413993d Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter at pcc.me.uk>
Date: Tue, 9 Jul 2024 15:40:27 -0700
Subject: [PATCH 3/3] Address remaining review comments, clang-format
Created using spr 1.3.6-beta.1
---
llvm/lib/Object/ArchiveWriter.cpp | 3 +--
llvm/test/Object/archive-malformed-object.test | 6 +++---
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 1c75268322257..361c512302c29 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -1305,8 +1305,7 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf,
- std::optional<bool> IsEC,
- function_ref<void(Error)> Warn) {
+ std::optional<bool> IsEC, function_ref<void(Error)> Warn) {
Expected<sys::fs::TempFile> Temp =
sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
if (!Temp)
diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test
index 147fcdf1e68c2..9d0d4dbd0ad0c 100644
--- a/llvm/test/Object/archive-malformed-object.test
+++ b/llvm/test/Object/archive-malformed-object.test
@@ -12,9 +12,9 @@
# RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)"
# RUN: llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
-# llvm-nm will fail when it tries to read the malformed bitcode file, but
-# it's supposed to print the archive map first, which in this case it
-# won't because there won't be one.
+## llvm-nm will fail when it tries to read the malformed bitcode file, but
+## it's supposed to print the archive map first, which in this case it
+## won't because there won't be one.
# RUN: not llvm-nm --print-armap bad.a | count 0
## Malformed bitcode object is the last file member of archive if the symbol table is required.
More information about the llvm-commits
mailing list