[llvm] c675a9b - Object: Don't error out on malformed bitcode files.

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 18 16:05:56 PDT 2024


Author: pcc
Date: 2024-07-18T16:05:53-07:00
New Revision: c675a9be63b67682477e5cbdc01c450f66bbc59a

URL: https://github.com/llvm/llvm-project/commit/c675a9be63b67682477e5cbdc01c450f66bbc59a
DIFF: https://github.com/llvm/llvm-project/commit/c675a9be63b67682477e5cbdc01c450f66bbc59a.diff

LOG: Object: Don't error out on malformed bitcode files.

An error reading a bitcode file most likely indicates that the file
was created by a compiler from the future. Normally we don't try to
implement forwards compatibility for bitcode files, but when creating
an archive we can implement best-effort forwards compatibility by
treating the file as a blob and not creating symbol index entries for
it. lld and mold ignore the archive symbol index, so provided that
you use one of these linkers, LTO will work as long as lld or the
gold plugin is newer than the compiler. We only ignore errors if the
archive format is one that is supported by a linker that is known to
ignore the index, otherwise there's no chance of this working so we
may as well error out. We print a warning on read failure so that
users of linkers that rely on the symbol index can diagnose the issue.

This is the same behavior as GNU ar when the linker plugin returns
an error when reading the input file. If the bitcode file is actually
malformed, it will be diagnosed at link time.

Reviewers: MaskRay, dwblaikie, jh7370

Reviewed By: MaskRay, dwblaikie, jh7370

Pull Request: https://github.com/llvm/llvm-project/pull/96848

Added: 
    

Modified: 
    llvm/include/llvm/Object/ArchiveWriter.h
    llvm/lib/Object/ArchiveWriter.cpp
    llvm/test/Object/archive-malformed-object.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index e41b3d51173d4..95b81f5f64c8f 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -48,25 +48,30 @@ enum class SymtabWritingMode {
   BigArchive64  // Only write the 64-bit symbol table.
 };
 
+void warnToStderr(Error Err);
+
 // Write an archive directly to an output stream.
 Error writeArchiveToStream(raw_ostream &Out,
                            ArrayRef<NewArchiveMember> NewMembers,
                            SymtabWritingMode WriteSymtab,
                            object::Archive::Kind Kind, bool Deterministic,
-                           bool Thin, std::optional<bool> IsEC = std::nullopt);
+                           bool Thin, std::optional<bool> IsEC = std::nullopt,
+                           function_ref<void(Error)> Warn = warnToStderr);
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
                    std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr,
-                   std::optional<bool> IsEC = std::nullopt);
+                   std::optional<bool> IsEC = std::nullopt,
+                   function_ref<void(Error)> Warn = warnToStderr);
 
 // writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
 // buffer instead of writing it out to a file.
 Expected<std::unique_ptr<MemoryBuffer>>
 writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
                      SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
-                     bool Deterministic, bool Thin);
+                     bool Deterministic, bool Thin,
+                     function_ref<void(Error)> Warn = warnToStderr);
 }
 
 #endif

diff  --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 34f12cf0111cf..114045561366d 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -482,7 +482,8 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
 }
 
 static Expected<std::unique_ptr<SymbolicFile>>
-getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
+getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context,
+                object::Archive::Kind Kind, function_ref<void(Error)> Warn) {
   const file_magic Type = identify_magic(Buf.getBuffer());
   // Don't attempt to read non-symbolic file types.
   if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
@@ -490,8 +491,36 @@ getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
   if (Type == file_magic::bitcode) {
     auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
         Buf, file_magic::bitcode, &Context);
-    if (!ObjOrErr)
-      return ObjOrErr.takeError();
+    // An error reading a bitcode file most likely indicates that the file
+    // was created by a compiler from the future. Normally we don't try to
+    // implement forwards compatibility for bitcode files, but when creating an
+    // archive we can implement best-effort forwards compatibility by treating
+    // the file as a blob and not creating symbol index entries for it. lld and
+    // mold ignore the archive symbol index, so provided that you use one of
+    // these linkers, LTO will work as long as lld or the gold plugin is newer
+    // than the compiler. We only ignore errors if the archive format is one
+    // that is supported by a linker that is known to ignore the index,
+    // otherwise there's no chance of this working so we may as well error out.
+    // We print a warning on read failure so that users of linkers that rely on
+    // the symbol index can diagnose the issue.
+    //
+    // This is the same behavior as GNU ar when the linker plugin returns an
+    // error when reading the input file. If the bitcode file is actually
+    // malformed, it will be diagnosed at link time.
+    if (!ObjOrErr) {
+      switch (Kind) {
+      case object::Archive::K_BSD:
+      case object::Archive::K_GNU:
+      case object::Archive::K_GNU64:
+        Warn(ObjOrErr.takeError());
+        return nullptr;
+      case object::Archive::K_AIXBIG:
+      case object::Archive::K_COFF:
+      case object::Archive::K_DARWIN:
+      case object::Archive::K_DARWIN64:
+        return ObjOrErr.takeError();
+      }
+    }
     return std::move(*ObjOrErr);
   } else {
     auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
@@ -751,7 +780,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
                   object::Archive::Kind Kind, bool Thin, bool Deterministic,
                   SymtabWritingMode NeedSymbols, SymMap *SymMap,
                   LLVMContext &Context, ArrayRef<NewArchiveMember> NewMembers,
-                  std::optional<bool> IsEC) {
+                  std::optional<bool> IsEC, function_ref<void(Error)> Warn) {
   static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
   uint64_t MemHeadPadSize = 0;
   uint64_t Pos =
@@ -819,8 +848,10 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 
   if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
     for (const NewArchiveMember &M : NewMembers) {
-      Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
-          getSymbolicFile(M.Buf->getMemBufferRef(), Context);
+      Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr = getSymbolicFile(
+          M.Buf->getMemBufferRef(), Context, Kind, [&](Error Err) {
+            Warn(createFileError(M.MemberName, std::move(Err)));
+          });
       if (!SymFileOrErr)
         return createFileError(M.MemberName, SymFileOrErr.takeError());
       SymFiles.push_back(std::move(*SymFileOrErr));
@@ -1001,7 +1032,8 @@ Error writeArchiveToStream(raw_ostream &Out,
                            ArrayRef<NewArchiveMember> NewMembers,
                            SymtabWritingMode WriteSymtab,
                            object::Archive::Kind Kind, bool Deterministic,
-                           bool Thin, std::optional<bool> IsEC) {
+                           bool Thin, std::optional<bool> IsEC,
+                           function_ref<void(Error)> Warn) {
   assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
 
   SmallString<0> SymNamesBuf;
@@ -1023,7 +1055,7 @@ Error writeArchiveToStream(raw_ostream &Out,
 
   Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
       StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
-      isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC);
+      isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers, IsEC, Warn);
   if (Error E = DataOrErr.takeError())
     return E;
   std::vector<MemberData> &Data = *DataOrErr;
@@ -1266,11 +1298,15 @@ Error writeArchiveToStream(raw_ostream &Out,
   return Error::success();
 }
 
+void warnToStderr(Error Err) {
+  llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "warning: ");
+}
+
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
                    std::unique_ptr<MemoryBuffer> OldArchiveBuf,
-                   std::optional<bool> IsEC) {
+                   std::optional<bool> IsEC, function_ref<void(Error)> Warn) {
   Expected<sys::fs::TempFile> Temp =
       sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
   if (!Temp)
@@ -1278,7 +1314,7 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
   raw_fd_ostream Out(Temp->FD, false);
 
   if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
-                                     Deterministic, Thin, IsEC)) {
+                                     Deterministic, Thin, IsEC, Warn)) {
     if (Error DiscardError = Temp->discard())
       return joinErrors(std::move(E), std::move(DiscardError));
     return E;
@@ -1302,12 +1338,14 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
 Expected<std::unique_ptr<MemoryBuffer>>
 writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
                      SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
-                     bool Deterministic, bool Thin) {
+                     bool Deterministic, bool Thin,
+                     function_ref<void(Error)> Warn) {
   SmallVector<char, 0> ArchiveBufferVector;
   raw_svector_ostream ArchiveStream(ArchiveBufferVector);
 
-  if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
-                                     Kind, Deterministic, Thin, std::nullopt))
+  if (Error E =
+          writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, Kind,
+                               Deterministic, Thin, std::nullopt, Warn))
     return std::move(E);
 
   return std::make_unique<SmallVectorMemoryBuffer>(

diff  --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test
index a92762975bda6..021df7ed873d1 100644
--- a/llvm/test/Object/archive-malformed-object.test
+++ b/llvm/test/Object/archive-malformed-object.test
@@ -1,27 +1,51 @@
 ## Show that the archive library emits error messages when adding malformed
-## objects.
+## object files and skips symbol tables for "malformed" bitcode files, which
+## are assumed to be bitcode files generated by compilers from the future.
 
 # RUN: rm -rf %t.dir
 # RUN: split-file %s %t.dir
 # RUN: cd %t.dir
 
-## Malformed bitcode object is the first file member of archive if the symbol table is required.
+## Create a malformed bitcode object.
 # RUN: llvm-as input.ll -o input.bc
 # RUN: cp input.bc good.bc
 # RUN: %python -c "with open('input.bc', 'a') as f: f.truncate(10)"
-# RUN: not llvm-ar rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
 
-## Malformed bitcode object is the last file member of archive if the symbol table is required.
+## Malformed bitcode objects either warn or error depending on the archive format
+## (see switch in getSymbolicFile). If the archive was created with a warning,
+## we want to check that the archive map is empty. llvm-nm will fail when it
+## tries to read the malformed bitcode file, but it's supposed to print the
+## archive map first, which in this case it won't because there won't be one.
 # RUN: rm -rf bad.a
-# RUN: not llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: llvm-ar --format=bsd rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: not llvm-nm --print-armap bad.a | count 0
+# RUN: rm -rf bad.a
+# RUN: llvm-ar --format=gnu rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: not llvm-nm --print-armap bad.a | count 0
+# RUN: rm -rf bad.a
+# RUN: not llvm-ar --format=bigarchive rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: rm -rf bad.a
+# RUN: not llvm-ar --format=coff rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+# RUN: rm -rf bad.a
+# RUN: not llvm-ar --format=darwin rc bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
+
+## Malformed bitcode object is the last file member of archive and
+## the symbol table is required. In this case we check that the
+## symbol table contains entries for the good object only.
+# RUN: rm -rf bad.a
+# RUN: llvm-ar rc bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=WARN1
+# RUN: not llvm-nm --print-armap bad.a | FileCheck %s --check-prefix=ARMAP
 
 ## Malformed bitcode object if the symbol table is not required for big archive.
+## For big archives we print an error instead of a warning because the AIX linker
+## presumably requires the index.
 # RUN: rm -rf bad.a
 # RUN: not llvm-ar --format=bigarchive rcS bad.a input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
 # RUN: rm -rf bad.a
 # RUN: not llvm-ar --format=bigarchive rcS bad.a good.bc input.bc 2>&1 | FileCheck %s --check-prefix=ERR1
 
 # ERR1: error: bad.a: 'input.bc': Invalid bitcode signature
+# WARN1: warning: 'input.bc': Invalid bitcode signature
 
 ## Non-bitcode malformed file.
 # RUN: yaml2obj input.yaml -o input.o
@@ -29,17 +53,23 @@
 
 # ERR2: error: bad.a: 'input.o': section header table goes past the end of the file: e_shoff = 0x9999
 
-## Don't emit an error if the symbol table is not required for formats other than the big archive format.
-# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc
+## Don't emit an error or warning if the symbol table is not required for formats other than the big archive format.
+# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc 2>&1 | count 0
 # RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS
 
 # CONTENTS:      input.o
 # CONTENTS-NEXT: input.bc
 
+# ARMAP: Archive map
+# ARMAP-NEXT: foo in good.bc
+# ARMAP-EMPTY:
+
 #--- input.ll
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux"
 
+ at foo = global i32 1
+
 #--- input.yaml
 --- !ELF
 FileHeader:


        


More information about the llvm-commits mailing list