[llvm] 2add7c5 - [llvm-ar][Object] Fix detection of need for 64-bit archive symbol tables

Andrew Ng via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 26 05:41:04 PDT 2020


Author: Andrew Ng
Date: 2020-10-26T12:29:28Z
New Revision: 2add7c5cf3ebbba629d2756b3e91728e55b40881

URL: https://github.com/llvm/llvm-project/commit/2add7c5cf3ebbba629d2756b3e91728e55b40881
DIFF: https://github.com/llvm/llvm-project/commit/2add7c5cf3ebbba629d2756b3e91728e55b40881.diff

LOG: [llvm-ar][Object] Fix detection of need for 64-bit archive symbol tables

The code to detect the requirement for 64-bit offsets in the archive
symbol table was not correctly accounting for the archive file signature
and the size of all the contents of the symbol table itself, e.g. the
symbol table's header and string table. Also was not considering the
variation in symbol table formats. This could result in the creation of
large archives with a corrupt symbol table.

Change the testing environment variable SYM64_THRESHOLD to be an
absolute value rather than a power of 2 in order to enable precise
testing of this detection code.

Differential Revision: https://reviews.llvm.org/D89891

Added: 
    

Modified: 
    llvm/lib/Object/ArchiveWriter.cpp
    llvm/test/Object/archive-symtab.test

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 08f1b85f2259..ce997464caa7 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -286,22 +286,12 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
     print<uint32_t>(Out, Kind, Val);
 }
 
-static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
-                             bool Deterministic, ArrayRef<MemberData> Members,
-                             StringRef StringTable) {
-  // We don't write a symbol table on an archive with no members -- except on
-  // Darwin, where the linker will abort unless the archive has a symbol table.
-  if (StringTable.empty() && !isDarwin(Kind))
-    return;
-
-  unsigned NumSyms = 0;
-  for (const MemberData &M : Members)
-    NumSyms += M.Symbols.size();
-
-  unsigned Size = 0;
-  unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t);
-
-  Size += OffsetSize; // Number of entries
+static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
+                                       uint64_t NumSyms, uint64_t OffsetSize,
+                                       StringRef StringTable,
+                                       uint32_t *Padding = nullptr) {
+  assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize");
+  uint64_t Size = OffsetSize; // Number of entries
   if (isBSDLike(Kind))
     Size += NumSyms * OffsetSize * 2; // Table
   else
@@ -313,10 +303,15 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
   // least 4-byte aligned for 32-bit content.  Opt for the larger encoding
   // uniformly.
   // We do this for all bsd formats because it simplifies aligning members.
-  const Align Alignment(isBSDLike(Kind) ? 8 : 2);
-  unsigned Pad = offsetToAlignment(Size, Alignment);
+  uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
   Size += Pad;
+  if (Padding)
+    *Padding = Pad;
+  return Size;
+}
 
+static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
+                                   bool Deterministic, uint64_t Size) {
   if (isBSDLike(Kind)) {
     const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
     printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
@@ -325,6 +320,24 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
     const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
     printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
   }
+}
+
+static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
+                             bool Deterministic, ArrayRef<MemberData> Members,
+                             StringRef StringTable) {
+  // We don't write a symbol table on an archive with no members -- except on
+  // Darwin, where the linker will abort unless the archive has a symbol table.
+  if (StringTable.empty() && !isDarwin(Kind))
+    return;
+
+  unsigned NumSyms = 0;
+  for (const MemberData &M : Members)
+    NumSyms += M.Symbols.size();
+
+  uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
+  uint32_t Pad;
+  uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad);
+  writeSymbolTableHeader(Out, Kind, Deterministic, Size);
 
   uint64_t Pos = Out.tell() + Size;
 
@@ -579,17 +592,28 @@ static Error writeArchiveToStream(raw_ostream &Out,
 
   // We would like to detect if we need to switch to a 64-bit symbol table.
   if (WriteSymtab) {
-    uint64_t MaxOffset = 0;
+    uint64_t MaxOffset = 8; // For the file signature.
     uint64_t LastOffset = MaxOffset;
+    uint64_t NumSyms = 0;
     for (const auto &M : Data) {
       // Record the start of the member's offset
       LastOffset = MaxOffset;
       // Account for the size of each part associated with the member.
       MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size();
-      // We assume 32-bit symbols to see if 32-bit symbols are possible or not.
-      MaxOffset += M.Symbols.size() * 4;
+      NumSyms += M.Symbols.size();
     }
 
+    // We assume 32-bit offsets to see if 32-bit symbols are possible or not.
+    uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf);
+    auto computeSymbolTableHeaderSize =
+        [=] {
+          SmallString<0> TmpBuf;
+          raw_svector_ostream Tmp(TmpBuf);
+          writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize);
+          return TmpBuf.size();
+        };
+    LastOffset += computeSymbolTableHeaderSize() + SymtabSize;
+
     // The SYM64 format is used when an archive's member offsets are larger than
     // 32-bits can hold. The need for this shift in format is detected by
     // writeArchive. To test this we need to generate a file with a member that
@@ -597,15 +621,15 @@ static Error writeArchiveToStream(raw_ostream &Out,
     // speed the test up we use this environment variable to pretend like the
     // cutoff happens before 32-bits and instead happens at some much smaller
     // value.
+    uint64_t Sym64Threshold = 1ULL << 32;
     const char *Sym64Env = std::getenv("SYM64_THRESHOLD");
-    int Sym64Threshold = 32;
     if (Sym64Env)
       StringRef(Sym64Env).getAsInteger(10, Sym64Threshold);
 
     // If LastOffset isn't going to fit in a 32-bit varible we need to switch
     // to 64-bit. Note that the file can be larger than 4GB as long as the last
     // member starts before the 4GB offset.
-    if (LastOffset >= (1ULL << Sym64Threshold)) {
+    if (LastOffset >= Sym64Threshold) {
       if (Kind == object::Archive::K_DARWIN)
         Kind = object::Archive::K_DARWIN64;
       else

diff  --git a/llvm/test/Object/archive-symtab.test b/llvm/test/Object/archive-symtab.test
index d77244a0e7a8..31cd849d19df 100644
--- a/llvm/test/Object/archive-symtab.test
+++ b/llvm/test/Object/archive-symtab.test
@@ -51,9 +51,14 @@ Symbols:
 # RUN: llvm-nm -M %t.a | FileCheck %s
 
 # RUN: rm -f %t.a
-# RUN: env SYM64_THRESHOLD=1 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64
+# RUN: env SYM64_THRESHOLD=836 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64
 # RUN: llvm-nm -M %t.a | FileCheck %s
-# RUXX: grep SYM64 %t.a
+# RUN: grep '/SYM64/' %t.a
+
+# RUN: rm -f %t.a
+# RUN: env SYM64_THRESHOLD=837 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64
+# RUN: llvm-nm -M %t.a | FileCheck %s
+# RUN: not grep '/SYM64/' %t.a
 
 # CHECK: Archive map
 # CHECK-NEXT: main in {{.*}}.elf-x86-64
@@ -136,10 +141,15 @@ Symbols:
 # RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s
 
 # RUN: rm -f %t.a
-# RUN: env SYM64_THRESHOLD=1 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64
+# RUN: env SYM64_THRESHOLD=784 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64
 # RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s
 # RUN: grep '__\.SYMDEF_64' %t.a
 
+# RUN: rm -f %t.a
+# RUN: env SYM64_THRESHOLD=785 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64
+# RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s
+# RUN: not grep '__\.SYMDEF_64' %t.a
+
 # MACHO: Archive map
 # MACHO-NEXT: _main in trivial-object-test.macho-x86-64
 # MACHO-NEXT: _foo in trivial-object-test2.macho-x86-64


        


More information about the llvm-commits mailing list