[llvm] [llvm-ar][Object][COFF] Compute UseECMap in computeMemberData. (PR #85230)

Jacek Caban via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 07:43:29 PDT 2024


https://github.com/cjacek created https://github.com/llvm/llvm-project/pull/85230

This adds support for ARM64EC archives to llvm-ar. Compute UseECMap it instead of requiring caller to pass it to, which is inconvenient for llvm-ar.

Depends on #85229.

>From d8b69356c61a85571624e9912d53651e24d19337 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Fri, 23 Feb 2024 01:13:40 +0100
Subject: [PATCH 1/2] [Object][Archive][NFC] Create all symbolic files objects
 before calculating offsets.

This is refactoring preparing to move UseECMap computation to the archive writer.
We currently require writeArchive caller to pass that. This is not practical
for llvm-ar, which currently interprets at most one passed object. For a reliable
UseECMap, we need to interpret all symbolic objects: we may have, for example,
a list of x86_64 files followed by aarch64 file, which indicates that we should
use EC map for x86_64 objects.

This commit interprets symbolic files in a separated pass, which will be a convenient
place to implement UseECMap computation in the follow up. I think it also
makes accessing the next member for AIX big archive offset computation a bit easier.
---
 llvm/lib/Object/ArchiveWriter.cpp | 58 ++++++++++++-------------------
 1 file changed, 23 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index aa57e55de70c8d..97d0e4f75be8e0 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -795,23 +795,31 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
       Entry.second = Entry.second > 1 ? 1 : 0;
   }
 
+  std::vector<std::unique_ptr<SymbolicFile>> SymFiles;
+
+  if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
+    for (const NewArchiveMember &M : NewMembers) {
+      Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
+          getSymbolicFile(M.Buf->getMemBufferRef(), Context);
+      if (!SymFileOrErr)
+        return createFileError(M.MemberName, SymFileOrErr.takeError());
+      SymFiles.push_back(std::move(*SymFileOrErr));
+    }
+  }
+
   // The big archive format needs to know the offset of the previous member
   // header.
   uint64_t PrevOffset = 0;
   uint64_t NextMemHeadPadSize = 0;
-  std::unique_ptr<SymbolicFile> CurSymFile;
-  std::unique_ptr<SymbolicFile> NextSymFile;
-  uint16_t Index = 0;
 
-  for (auto M = NewMembers.begin(); M < NewMembers.end(); ++M) {
+  for (uint32_t Index = 0; Index < NewMembers.size(); ++Index) {
+    const NewArchiveMember *M = &NewMembers[Index];
     std::string Header;
     raw_string_ostream Out(Header);
 
     MemoryBufferRef Buf = M->Buf->getMemBufferRef();
     StringRef Data = Thin ? "" : Buf.getBuffer();
 
-    Index++;
-
     // ld64 expects the members to be 8-byte aligned for 64-bit content and at
     // least 4-byte aligned for 32-bit content.  Opt for the larger encoding
     // uniformly.  This matches the behaviour with cctools and ensures that ld64
@@ -837,29 +845,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
           std::move(StringMsg), object::object_error::parse_failed);
     }
 
-    if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) {
-      auto SetNextSymFile = [&NextSymFile,
-                             &Context](MemoryBufferRef Buf,
-                                       StringRef MemberName) -> Error {
-        Expected<std::unique_ptr<SymbolicFile>> SymFileOrErr =
-            getSymbolicFile(Buf, Context);
-        if (!SymFileOrErr)
-          return createFileError(MemberName, SymFileOrErr.takeError());
-        NextSymFile = std::move(*SymFileOrErr);
-        return Error::success();
-      };
-
-      if (M == NewMembers.begin())
-        if (Error Err = SetNextSymFile(Buf, M->MemberName))
-          return std::move(Err);
-
-      CurSymFile = std::move(NextSymFile);
-
-      if ((M + 1) != NewMembers.end())
-        if (Error Err = SetNextSymFile((M + 1)->Buf->getMemBufferRef(),
-                                       (M + 1)->MemberName))
-          return std::move(Err);
-    }
+    std::unique_ptr<SymbolicFile> CurSymFile;
+    if (!SymFiles.empty())
+      CurSymFile = std::move(SymFiles[Index]);
 
     // In the big archive file format, we need to calculate and include the next
     // member offset and previous member offset in the file member header.
@@ -880,13 +868,13 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 
       // If there is another member file after this, we need to calculate the
       // padding before the header.
-      if ((M + 1) != NewMembers.end()) {
-        uint64_t OffsetToNextMemData = NextOffset +
-                                       sizeof(object::BigArMemHdrType) +
-                                       alignTo((M + 1)->MemberName.size(), 2);
+      if (Index + 1 != SymFiles.size()) {
+        uint64_t OffsetToNextMemData =
+            NextOffset + sizeof(object::BigArMemHdrType) +
+            alignTo(NewMembers[Index + 1].MemberName.size(), 2);
         NextMemHeadPadSize =
             alignToPowerOf2(OffsetToNextMemData,
-                            getMemberAlignment(NextSymFile.get())) -
+                            getMemberAlignment(SymFiles[Index + 1].get())) -
             OffsetToNextMemData;
         NextOffset += NextMemHeadPadSize;
       }
@@ -902,7 +890,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
     std::vector<unsigned> Symbols;
     if (NeedSymbols != SymtabWritingMode::NoSymtab) {
       Expected<std::vector<unsigned>> SymbolsOrErr =
-          getSymbols(CurSymFile.get(), Index, SymNames, SymMap);
+          getSymbols(CurSymFile.get(), Index + 1, SymNames, SymMap);
       if (!SymbolsOrErr)
         return createFileError(M->MemberName, SymbolsOrErr.takeError());
       Symbols = std::move(*SymbolsOrErr);

>From 6f151fb1e8d69dfa0a9ecb42c91fa775f3acf2ed Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Sun, 18 Feb 2024 23:59:07 +0100
Subject: [PATCH 2/2] [llvm-ar][Object][COFF] Compute UseECMap in
 computeMemberData.

Compute it instead of requiring caller to pass it to, which is inconvenient for llvm-ar.
---
 llvm/include/llvm/Object/ArchiveWriter.h    |  3 +-
 llvm/lib/Object/ArchiveWriter.cpp           | 35 +++++++--
 llvm/lib/Object/COFFImportFile.cpp          |  3 +-
 llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp |  8 +-
 llvm/test/tools/llvm-ar/ecsymbols.yaml      | 83 +++++++++++++++++++++
 5 files changed, 118 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/tools/llvm-ar/ecsymbols.yaml

diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index 7f915929cd7219..59252bf2bd5488 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -51,8 +51,7 @@ enum class SymtabWritingMode {
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
-                   std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr,
-                   bool IsEC = false);
+                   std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr);
 
 // writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
 // buffer instead of writing it out to a file.
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 97d0e4f75be8e0..763a1e3c2f0abe 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -48,7 +48,7 @@ using namespace llvm;
 using namespace llvm::object;
 
 struct SymMap {
-  bool UseECMap;
+  bool UseECMap = false;
   std::map<std::string, uint16_t> Map;
   std::map<std::string, uint16_t> ECMap;
 };
@@ -678,6 +678,25 @@ static bool isECObject(object::SymbolicFile &Obj) {
   return false;
 }
 
+static bool useECMap(object::SymbolicFile &Obj) {
+  if (Obj.isCOFF())
+    return COFF::isAnyArm64(cast<COFFObjectFile>(&Obj)->getMachine());
+
+  if (Obj.isCOFFImportFile())
+    return COFF::isAnyArm64(cast<COFFImportFile>(&Obj)->getMachine());
+
+  if (Obj.isIR()) {
+    Expected<std::string> TripleStr =
+        getBitcodeTargetTriple(Obj.getMemoryBufferRef());
+    if (!TripleStr)
+      return false;
+    Triple T(*TripleStr);
+    return T.getArch() == Triple::aarch64;
+  }
+
+  return false;
+}
+
 bool isImportDescriptor(StringRef Name) {
   return Name.starts_with(ImportDescriptorPrefix) ||
          Name == StringRef{NullImportDescriptorSymbolName} ||
@@ -803,6 +822,11 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
           getSymbolicFile(M.Buf->getMemBufferRef(), Context);
       if (!SymFileOrErr)
         return createFileError(M.MemberName, SymFileOrErr.takeError());
+
+      // Use EC map if any member is ARM64 COFF file.
+      if (Kind == Archive::K_COFF && !SymMap->UseECMap && *SymFileOrErr)
+        SymMap->UseECMap = useECMap(**SymFileOrErr);
+
       SymFiles.push_back(std::move(*SymFileOrErr));
     }
   }
@@ -957,7 +981,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
                                   ArrayRef<NewArchiveMember> NewMembers,
                                   SymtabWritingMode WriteSymtab,
                                   object::Archive::Kind Kind,
-                                  bool Deterministic, bool Thin, bool IsEC) {
+                                  bool Deterministic, bool Thin) {
   assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
 
   SmallString<0> SymNamesBuf;
@@ -977,7 +1001,6 @@ static Error writeArchiveToStream(raw_ostream &Out,
   // reference to it, thus SymbolicFile should be destroyed first.
   LLVMContext Context;
 
-  SymMap.UseECMap = IsEC;
   Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
       StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
       isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers);
@@ -1226,7 +1249,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
-                   std::unique_ptr<MemoryBuffer> OldArchiveBuf, bool IsEC) {
+                   std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
   Expected<sys::fs::TempFile> Temp =
       sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
   if (!Temp)
@@ -1234,7 +1257,7 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
   raw_fd_ostream Out(Temp->FD, false);
 
   if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
-                                     Deterministic, Thin, IsEC)) {
+                                     Deterministic, Thin)) {
     if (Error DiscardError = Temp->discard())
       return joinErrors(std::move(E), std::move(DiscardError));
     return E;
@@ -1263,7 +1286,7 @@ writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,
   raw_svector_ostream ArchiveStream(ArchiveBufferVector);
 
   if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
-                                     Kind, Deterministic, Thin, false))
+                                     Kind, Deterministic, Thin))
     return std::move(E);
 
   return std::make_unique<SmallVectorMemoryBuffer>(
diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp
index 46c8e702581eac..bf3611406f1d9d 100644
--- a/llvm/lib/Object/COFFImportFile.cpp
+++ b/llvm/lib/Object/COFFImportFile.cpp
@@ -711,8 +711,7 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
 
   return writeArchive(Path, Members, SymtabWritingMode::NormalSymtab,
                       object::Archive::K_COFF,
-                      /*Deterministic*/ true, /*Thin*/ false,
-                      /*OldArchiveBuf*/ nullptr, isArm64EC(Machine));
+                      /*Deterministic*/ true, /*Thin*/ false);
 }
 
 } // namespace object
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index c3015d895230ea..64c89321331ba1 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -507,10 +507,10 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   std::reverse(Members.begin(), Members.end());
 
   bool Thin = Args.hasArg(OPT_llvmlibthin);
-  if (Error E = writeArchive(
-          OutputPath, Members, SymtabWritingMode::NormalSymtab,
-          Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
-          /*Deterministic=*/true, Thin, nullptr, COFF::isArm64EC(LibMachine))) {
+  if (Error E =
+          writeArchive(OutputPath, Members, SymtabWritingMode::NormalSymtab,
+                       Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
+                       /*Deterministic=*/true, Thin)) {
     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
       llvm::errs() << OutputPath << ": " << EI.message() << "\n";
     });
diff --git a/llvm/test/tools/llvm-ar/ecsymbols.yaml b/llvm/test/tools/llvm-ar/ecsymbols.yaml
new file mode 100644
index 00000000000000..1009ab278a978c
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/ecsymbols.yaml
@@ -0,0 +1,83 @@
+## Test that ECSYMBOLS section is created when ARM64EC is used.
+
+# RUN: yaml2obj %s -o %t.arm64ec.obj -DMACHINE=IMAGE_FILE_MACHINE_ARM64EC
+# RUN: yaml2obj %s -o %t.arm64.obj -DMACHINE=IMAGE_FILE_MACHINE_ARM64
+# RUN: yaml2obj %s -o %t.amd64.obj -DMACHINE=IMAGE_FILE_MACHINE_AMD64
+
+## Create ARM64EC archive
+# RUN: rm -f %t.a
+# RUN: llvm-ar crs %t.a %t.arm64ec.obj
+# RUN: llvm-nm --print-armap %t.a | FileCheck --check-prefixes=NOMAP,ECMAP %s
+
+## Add ARM64 object to the archive
+# RUN: llvm-ar rs %t.a %t.arm64.obj
+# RUN: llvm-nm --print-armap %t.a | FileCheck --check-prefixes=MAP,ECMAP %s
+
+## Create ARM64 archive
+# RUN: rm -f %t.a
+# RUN: llvm-ar crs %t.a %t.arm64.obj
+# RUN: llvm-nm --print-armap %t.a | FileCheck --check-prefixes=MAP,NOECMAP %s
+
+# Add ARM64EC object to the archive
+# RUN: llvm-ar rs %t.a %t.arm64ec.obj
+# RUN: llvm-nm --print-armap %t.a | FileCheck --check-prefixes=MAP,ECMAP %s
+
+## Create mixed archive
+# RUN: rm -f %t.a
+# RUN: llvm-ar crs %t.a %t.arm64ec.obj %t.arm64.obj
+# RUN: llvm-nm --print-armap %t.a | FileCheck --check-prefixes=MAP,ECMAP %s
+
+## Create mixed archive
+# RUN: rm -f %t.a
+# RUN: llvm-ar crs %t.a %t.amd64.obj %t.arm64.obj
+# RUN: llvm-nm --print-armap %t.a | FileCheck --check-prefixes=MAP,AMDECMAP %s
+
+# MAP: Archive map
+# MAP-NEXT: a in ecsymbols.yaml.tmp.arm64.obj
+# MAP-NEXT: b in ecsymbols.yaml.tmp.arm64.obj
+# MAP-NEXT: c in ecsymbols.yaml.tmp.arm64.obj
+# MAP-EMPTY:
+# NOMAP-NOT: Archive map
+
+# ECMAP: Archive EC map
+# ECMAP-NEXT: a in ecsymbols.yaml.tmp.arm64ec.obj
+# ECMAP-NEXT: b in ecsymbols.yaml.tmp.arm64ec.obj
+# ECMAP-NEXT: c in ecsymbols.yaml.tmp.arm64ec.obj
+# ECMAP-EMPTY:
+# NOECMAP-NOT: Archive EC map
+
+# AMDECMAP: Archive EC map
+# AMDECMAP-NEXT: a in ecsymbols.yaml.tmp.amd64.obj
+# AMDECMAP-NEXT: b in ecsymbols.yaml.tmp.amd64.obj
+# AMDECMAP-NEXT: c in ecsymbols.yaml.tmp.amd64.obj
+# ECMAP-EMPTY:
+
+--- !COFF
+header:
+  Machine:         [[MACHINE]]
+  Characteristics: [  ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     ''
+symbols:
+  - Name:            b
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            c
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            a
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+...



More information about the llvm-commits mailing list