[llvm] 4fcbf38 - [llvm-lib] Use COFF archive format in llvm-lib (other archive tools don't use this format).

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 23 04:43:59 PDT 2023


Author: Jacek Caban
Date: 2023-03-23T13:43:21+02:00
New Revision: 4fcbf3842007569880fa916831efefda6b1bd032

URL: https://github.com/llvm/llvm-project/commit/4fcbf3842007569880fa916831efefda6b1bd032
DIFF: https://github.com/llvm/llvm-project/commit/4fcbf3842007569880fa916831efefda6b1bd032.diff

LOG: [llvm-lib] Use COFF archive format in llvm-lib (other archive tools don't use this format).

We currently just use GNU format for llvm-lib. This mostly works, but
ARM64EC needs an additional section that does not really fit GNU format.
This patch implements writing in COFF format (as in, it's what archive
reader considers as K_COFF). This mostly requires symbol emitting symbol
map. Note that, just like in case of MSVC, symbols are de-duplicated in
both usual symbol table and the new symbol map.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D143540

Added: 
    

Modified: 
    llvm/lib/Object/ArchiveWriter.cpp
    llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
    llvm/test/tools/llvm-lib/duplicate.test

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index cd0429a27122..2d0f92e43a34 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/BinaryFormat/Magic.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Object/MachO.h"
@@ -43,6 +44,10 @@
 
 using namespace llvm;
 
+struct SymMap {
+  std::map<std::string, uint16_t> Map;
+};
+
 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
     : Buf(MemoryBuffer::getMemBuffer(BufRef, false)),
       MemberName(BufRef.getBufferIdentifier()) {}
@@ -169,18 +174,21 @@ static bool isAIXBigArchive(object::Archive::Kind Kind) {
   return Kind == object::Archive::K_AIXBIG;
 }
 
+static bool isCOFFArchive(object::Archive::Kind Kind) {
+  return Kind == object::Archive::K_COFF;
+}
+
 static bool isBSDLike(object::Archive::Kind Kind) {
   switch (Kind) {
   case object::Archive::K_GNU:
   case object::Archive::K_GNU64:
   case object::Archive::K_AIXBIG:
+  case object::Archive::K_COFF:
     return false;
   case object::Archive::K_BSD:
   case object::Archive::K_DARWIN:
   case object::Archive::K_DARWIN64:
     return true;
-  case object::Archive::K_COFF:
-    break;
   }
   llvm_unreachable("not supported for writting");
 }
@@ -191,6 +199,10 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) {
                          isBSDLike(Kind) ? support::little : support::big);
 }
 
+template <class T> static void printLE(raw_ostream &Out, T Val) {
+  support::endian::write(Out, Val, support::little);
+}
+
 static void printRestOfMemberHeader(
     raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime,
     unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) {
@@ -295,7 +307,11 @@ printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
     auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
     if (Insertion.second) {
       Insertion.first->second = StringTable.tell();
-      StringTable << M.MemberName << "/\n";
+      StringTable << M.MemberName;
+      if (isCOFFArchive(Kind))
+        StringTable << '\0';
+      else
+        StringTable << "/\n";
     }
     NamePos = Insertion.first->second;
   }
@@ -376,6 +392,22 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
   uint32_t Pad = isAIXBigArchive(Kind)
                      ? 0
                      : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
+
+  Size += Pad;
+  if (Padding)
+    *Padding = Pad;
+  return Size;
+}
+
+static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap,
+                                     uint32_t *Padding = nullptr) {
+  uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries
+  Size += NumObj * sizeof(uint32_t);    // Offset table
+
+  for (auto S : SymMap.Map)
+    Size += sizeof(uint16_t) + S.first.length() + 1;
+
+  uint32_t Pad = offsetToAlignment(Size, Align(2));
   Size += Pad;
   if (Padding)
     *Padding = Pad;
@@ -399,8 +431,9 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
 }
 
 static uint64_t computeHeadersSize(object::Archive::Kind Kind,
+                                   uint64_t NumMembers,
                                    uint64_t StringMemberSize, uint64_t NumSyms,
-                                   uint64_t SymNamesSize) {
+                                   uint64_t SymNamesSize, SymMap *SymMap) {
   uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
   uint64_t SymtabSize =
       computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize);
@@ -410,8 +443,13 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind,
     writeSymbolTableHeader(Tmp, Kind, true, SymtabSize);
     return TmpBuf.size();
   };
+  uint32_t HeaderSize = computeSymbolTableHeaderSize();
+  uint64_t Size = strlen("!<arch>\n") + HeaderSize + SymtabSize;
+
+  if (SymMap)
+    Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap);
 
-  return strlen("!<arch>\n") + computeSymbolTableHeaderSize() + SymtabSize + StringMemberSize;
+  return Size + StringMemberSize;
 }
 
 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
@@ -420,7 +458,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
                              uint64_t PrevMemberOffset = 0) {
   // We don't write a symbol table on an archive with no members -- except on
   // Darwin, where the linker will abort unless the archive has a symbol table.
-  if (StringTable.empty() && !isDarwin(Kind))
+  if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind))
     return;
 
   unsigned NumSyms = 0;
@@ -457,8 +495,35 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
     Out.write(uint8_t(0));
 }
 
+static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind,
+                           bool Deterministic, ArrayRef<MemberData> Members,
+                           SymMap &SymMap, uint64_t MembersOffset) {
+  uint32_t Pad;
+  uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad);
+  writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0);
+
+  uint32_t Pos = MembersOffset;
+
+  printLE<uint32_t>(Out, Members.size());
+  for (const MemberData &M : Members) {
+    printLE(Out, Pos); // member offset
+    Pos += M.Header.size() + M.Data.size() + M.Padding.size();
+  }
+
+  printLE<uint32_t>(Out, SymMap.Map.size());
+
+  for (auto S : SymMap.Map)
+    printLE(Out, S.second);
+  for (auto S : SymMap.Map)
+    Out << S.first << '\0';
+
+  while (Pad--)
+    Out.write(uint8_t(0));
+}
+
 static Expected<std::vector<unsigned>>
-getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
+getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames,
+           SymMap *SymMap, bool &HasObject) {
   std::vector<unsigned> Ret;
 
   // In the scenario when LLVMContext is populated SymbolicFile will contain a
@@ -487,10 +552,22 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
   for (const object::BasicSymbolRef &S : Obj->symbols()) {
     if (!isArchiveSymbol(S))
       continue;
-    Ret.push_back(SymNames.tell());
-    if (Error E = S.printName(SymNames))
-      return std::move(E);
-    SymNames << '\0';
+    if (SymMap) {
+      std::string Name;
+      raw_string_ostream NameStream(Name);
+      if (Error E = S.printName(NameStream))
+        return std::move(E);
+      if (SymMap->Map.find(Name) != SymMap->Map.end())
+        continue; // ignore duplicated symbol
+      SymMap->Map[Name] = Index;
+      Ret.push_back(SymNames.tell());
+      SymNames << Name << '\0';
+    } else {
+      Ret.push_back(SymNames.tell());
+      if (Error E = S.printName(SymNames))
+        return std::move(E);
+      SymNames << '\0';
+    }
   }
   return Ret;
 }
@@ -498,7 +575,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
 static Expected<std::vector<MemberData>>
 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
                   object::Archive::Kind Kind, bool Thin, bool Deterministic,
-                  bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) {
+                  bool NeedSymbols, SymMap *SymMap,
+                  ArrayRef<NewArchiveMember> NewMembers) {
   static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
 
   uint64_t Pos =
@@ -564,7 +642,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 
   // The big archive format needs to know the offset of the previous member
   // header.
-  unsigned PrevOffset = 0;
+  unsigned PrevOffset = 0, Index = 0;
   for (const NewArchiveMember &M : NewMembers) {
     std::string Header;
     raw_string_ostream Out(Header);
@@ -572,6 +650,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
     MemoryBufferRef Buf = M.Buf->getMemBufferRef();
     StringRef Data = Thin ? "" : Buf.getBuffer();
 
+    Index++;
+
     // ld64 expects the members to be 8-byte aligned for 64-bit content and at
     // least 4-byte aligned for 32-bit content.  Opt for the larger encoding
     // uniformly.  This matches the behaviour with cctools and ensures that ld64
@@ -612,7 +692,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
     std::vector<unsigned> Symbols;
     if (NeedSymbols) {
       Expected<std::vector<unsigned>> SymbolsOrErr =
-          getSymbols(Buf, SymNames, HasObject);
+          getSymbols(Buf, Index, SymNames, SymMap, HasObject);
       if (!SymbolsOrErr)
         return createFileError(M.MemberName, SymbolsOrErr.takeError());
       Symbols = std::move(*SymbolsOrErr);
@@ -624,7 +704,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
   // If there are no symbols, emit an empty symbol table, to satisfy Solaris
   // tools, older versions of which expect a symbol table in a non-empty
   // archive, regardless of whether there are any symbols in it.
-  if (HasObject && SymNames.tell() == 0)
+  if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind))
     SymNames << '\0' << '\0' << '\0';
   return Ret;
 }
@@ -682,10 +762,16 @@ static Error writeArchiveToStream(raw_ostream &Out,
   raw_svector_ostream SymNames(SymNamesBuf);
   SmallString<0> StringTableBuf;
   raw_svector_ostream StringTable(StringTableBuf);
+  SymMap SymMap;
 
-  Expected<std::vector<MemberData>> DataOrErr =
-      computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic,
-                        WriteSymtab, NewMembers);
+  // COFF symbol map uses 16-bit indexes, so we can't use it if there are too
+  // many members.
+  if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe)
+    Kind = object::Archive::K_GNU;
+
+  Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
+      StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
+      isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers);
   if (Error E = DataOrErr.takeError())
     return E;
   std::vector<MemberData> &Data = *DataOrErr;
@@ -717,8 +803,9 @@ static Error writeArchiveToStream(raw_ostream &Out,
   // table is at the start of the archive file for other archive formats.
   if (WriteSymtab && !is64BitKind(Kind)) {
     // We assume 32-bit offsets to see if 32-bit symbols are possible or not.
-    HeadersSize =
-        computeHeadersSize(Kind, StringTableSize, NumSyms, SymNamesBuf.size());
+    HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize,
+                                     NumSyms, SymNamesBuf.size(),
+                                     isCOFFArchive(Kind) ? &SymMap : nullptr);
 
     // The SYM64 format is used when an archive's member offsets are larger than
     // 32-bits can hold. The need for this shift in format is detected by
@@ -754,10 +841,14 @@ static Error writeArchiveToStream(raw_ostream &Out,
   if (!isAIXBigArchive(Kind)) {
     if (WriteSymtab) {
       if (!HeadersSize)
-        HeadersSize = computeHeadersSize(Kind, StringTableSize, NumSyms,
-                                         SymNamesBuf.size());
+        HeadersSize = computeHeadersSize(
+            Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(),
+            isCOFFArchive(Kind) ? &SymMap : nullptr);
       writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf,
                        *HeadersSize);
+
+      if (isCOFFArchive(Kind))
+        writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize);
     }
 
     if (StringTableSize)

diff  --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 3a609eefcb10..9ca63bead9bc 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -455,10 +455,12 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   // For compatibility with MSVC, reverse member vector after de-duplication.
   std::reverse(Members.begin(), Members.end());
 
+  bool Thin = Args.hasArg(OPT_llvmlibthin);
   if (Error E =
           writeArchive(OutputPath, Members,
-                       /*WriteSymtab=*/true, object::Archive::K_GNU,
-                       /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin))) {
+                       /*WriteSymtab=*/true,
+                       Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
+                       /*Deterministic*/ true, Thin)) {
     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
       llvm::errs() << OutputPath << ": " << EI.message() << "\n";
     });

diff  --git a/llvm/test/tools/llvm-lib/duplicate.test b/llvm/test/tools/llvm-lib/duplicate.test
index 098858d4fbcd..87dae66cb80b 100644
--- a/llvm/test/tools/llvm-lib/duplicate.test
+++ b/llvm/test/tools/llvm-lib/duplicate.test
@@ -14,3 +14,12 @@ CHECK: bar.o
 CHECK-NEXT: abc.o
 CHECK-NEXT: foo.o
 CHECK-NOT: foo.o
+
+# Check that symbol map contains sorted, de-duplicated symbols.
+RUN: cd %t && llvm-lib -out:foo.lib foo.o foo.o abc.o bar.o  foo.o foo.o
+RUN: llvm-nm --print-armap %t/foo.lib | FileCheck %s --check-prefix=DUP
+# DUP: Archive map
+# DUP-NEXT: a in abc.o
+# DUP-NEXT: b in bar.o
+# DUP-NEXT: c in abc.o
+# DUP-EMPTY


        


More information about the llvm-commits mailing list