[llvm] 85a2c50 - [llvm-nm] Print EC symbol map.

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 21 05:46:29 PDT 2023


Author: Jacek Caban
Date: 2023-04-21T15:46:19+03:00
New Revision: 85a2c50ec4979b87267e4f2068f2921beb0b98f2

URL: https://github.com/llvm/llvm-project/commit/85a2c50ec4979b87267e4f2068f2921beb0b98f2
DIFF: https://github.com/llvm/llvm-project/commit/85a2c50ec4979b87267e4f2068f2921beb0b98f2.diff

LOG: [llvm-nm] Print EC symbol map.

This is useful for examining ARM64EC static libraries and allows
better llvm-lib testing. Changes to Archive class will also be
useful for LLD to support ARM64EC, where it will need to use one
map or the other, depending on linking target (or both, in case of
ARM64X, but separately as they are in different namespaces).

Reviewed By: jhenderson, efriedma

Differential Revision: https://reviews.llvm.org/D146534

Added: 
    llvm/test/tools/llvm-nm/ecsymbols.test

Modified: 
    llvm/include/llvm/Object/Archive.h
    llvm/lib/Object/Archive.cpp
    llvm/test/tools/llvm-lib/ecsymbols.test
    llvm/tools/llvm-nm/llvm-nm.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h
index b77c8537eeaa0..7722f851183f2 100644
--- a/llvm/include/llvm/Object/Archive.h
+++ b/llvm/include/llvm/Object/Archive.h
@@ -302,6 +302,7 @@ class Archive : public Binary {
     StringRef getName() const;
     Expected<Child> getMember() const;
     Symbol getNext() const;
+    bool isECSymbol() const;
   };
 
   class symbol_iterator {
@@ -352,6 +353,8 @@ class Archive : public Binary {
     return make_range(symbol_begin(), symbol_end());
   }
 
+  Expected<iterator_range<symbol_iterator>> ec_symbols() const;
+
   static bool classof(Binary const *v) { return v->isArchive(); }
 
   // check if a symbol is in the archive
@@ -362,6 +365,7 @@ class Archive : public Binary {
   StringRef getSymbolTable() const { return SymbolTable; }
   StringRef getStringTable() const { return StringTable; }
   uint32_t getNumberOfSymbols() const;
+  uint32_t getNumberOfECSymbols() const;
   virtual uint64_t getFirstChildOffset() const { return getArchiveMagicLen(); }
 
   std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
@@ -377,6 +381,7 @@ class Archive : public Binary {
   void setFirstRegular(const Child &C);
 
   StringRef SymbolTable;
+  StringRef ECSymbolTable;
   StringRef StringTable;
 
 private:

diff  --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 52d0dafd07e4c..6ddbe3518d8e9 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -933,6 +933,34 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
     StringTable = BufOrErr.get();
     if (Increment())
       return;
+
+    if (I == E) {
+      setFirstRegular(*C);
+      Err = Error::success();
+      return;
+    }
+
+    NameOrErr = C->getRawName();
+    if (!NameOrErr) {
+      Err = NameOrErr.takeError();
+      return;
+    }
+    Name = NameOrErr.get();
+  }
+
+  if (Name == "/<ECSYMBOLS>/") {
+    // ARM64EC-aware libraries contain an additional special member with
+    // an EC symbol map after the string table. Its format is similar to a
+    // regular symbol map, except it doesn't contain member offsets. Its indexes
+    // refer to member offsets from the regular symbol table instead.
+    Expected<StringRef> BufOrErr = C->getBuffer();
+    if (!BufOrErr) {
+      Err = BufOrErr.takeError();
+      return;
+    }
+    ECSymbolTable = BufOrErr.get();
+    if (Increment())
+      return;
   }
 
   setFirstRegular(*C);
@@ -967,7 +995,17 @@ Archive::child_iterator Archive::child_end() const {
   return child_iterator::end(Child(nullptr, nullptr, nullptr));
 }
 
+bool Archive::Symbol::isECSymbol() const {
+  // Symbols use SymbolCount..SymbolCount+getNumberOfECSymbols() for EC symbol
+  // indexes.
+  uint32_t SymbolCount = Parent->getNumberOfSymbols();
+  return SymbolCount <= SymbolIndex &&
+         SymbolIndex < SymbolCount + Parent->getNumberOfECSymbols();
+}
+
 StringRef Archive::Symbol::getName() const {
+  if (isECSymbol())
+    return Parent->ECSymbolTable.begin() + StringIndex;
   return Parent->getSymbolTable().begin() + StringIndex;
 }
 
@@ -1006,15 +1044,24 @@ Expected<Archive::Child> Archive::Symbol::getMember() const {
     Buf += MemberCount * 4 + 4;
 
     uint32_t SymbolCount = read32le(Buf);
-    if (SymbolIndex >= SymbolCount)
+    uint16_t OffsetIndex;
+    if (SymbolIndex < SymbolCount) {
+      // Skip SymbolCount to get to the indices table.
+      const char *Indices = Buf + 4;
+
+      // Get the index of the offset in the file member offset table for this
+      // symbol.
+      OffsetIndex = read16le(Indices + SymbolIndex * 2);
+    } else if (isECSymbol()) {
+      // Skip SymbolCount to get to the indices table.
+      const char *Indices = Parent->ECSymbolTable.begin() + 4;
+
+      // Get the index of the offset in the file member offset table for this
+      // symbol.
+      OffsetIndex = read16le(Indices + (SymbolIndex - SymbolCount) * 2);
+    } else {
       return errorCodeToError(object_error::parse_failed);
-
-    // Skip SymbolCount to get to the indices table.
-    const char *Indices = Buf + 4;
-
-    // Get the index of the offset in the file member offset table for this
-    // symbol.
-    uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
+    }
     // Subtract 1 since OffsetIndex is 1 based.
     --OffsetIndex;
 
@@ -1063,6 +1110,9 @@ Archive::Symbol Archive::Symbol::getNext() const {
       t.StringIndex -= CurRanStrx;
       t.StringIndex += NextRanStrx;
     }
+  } else if (t.isECSymbol()) {
+    // Go to one past next null.
+    t.StringIndex = Parent->ECSymbolTable.find('\0', t.StringIndex) + 1;
   } else {
     // Go to one past next null.
     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
@@ -1133,6 +1183,51 @@ Archive::symbol_iterator Archive::symbol_end() const {
   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
 }
 
+Expected<iterator_range<Archive::symbol_iterator>> Archive::ec_symbols() const {
+  uint32_t Count = 0;
+
+  // Validate EC symbol table.
+  if (!ECSymbolTable.empty()) {
+    if (ECSymbolTable.size() < sizeof(uint32_t))
+      return malformedError("invalid EC symbols size (" +
+                            Twine(ECSymbolTable.size()) + ")");
+    if (SymbolTable.size() < sizeof(uint32_t))
+      return malformedError("invalid symbols size (" +
+                            Twine(ECSymbolTable.size()) + ")");
+
+    Count = read32le(ECSymbolTable.begin());
+    size_t StringIndex = sizeof(uint32_t) + Count * sizeof(uint16_t);
+    if (ECSymbolTable.size() < StringIndex)
+      return malformedError("invalid EC symbols size. Size was " +
+                            Twine(ECSymbolTable.size()) + ", but expected " +
+                            Twine(StringIndex));
+
+    uint32_t MemberCount = read32le(SymbolTable.begin());
+    const char *Indexes = ECSymbolTable.begin() + sizeof(uint32_t);
+
+    for (uint32_t i = 0; i < Count; ++i) {
+      uint16_t Index = read16le(Indexes + i * sizeof(uint16_t));
+      if (!Index)
+        return malformedError("invalid EC symbol index 0");
+      if (Index > MemberCount)
+        return malformedError("invalid EC symbol index " + Twine(Index) +
+                              " is larger than member count " +
+                              Twine(MemberCount));
+
+      StringIndex = ECSymbolTable.find('\0', StringIndex);
+      if (StringIndex == StringRef::npos)
+        return malformedError("malformed EC symbol names: not null-terminated");
+      ++StringIndex;
+    }
+  }
+
+  uint32_t SymbolCount = getNumberOfSymbols();
+  return make_range(
+      symbol_iterator(Symbol(this, SymbolCount,
+                             sizeof(uint32_t) + Count * sizeof(uint16_t))),
+      symbol_iterator(Symbol(this, SymbolCount + Count, 0)));
+}
+
 uint32_t Archive::getNumberOfSymbols() const {
   if (!hasSymbolTable())
     return 0;
@@ -1151,6 +1246,12 @@ uint32_t Archive::getNumberOfSymbols() const {
   return read32le(buf);
 }
 
+uint32_t Archive::getNumberOfECSymbols() const {
+  if (ECSymbolTable.size() < sizeof(uint32_t))
+    return 0;
+  return read32le(ECSymbolTable.begin());
+}
+
 Expected<std::optional<Archive::Child>> Archive::findSym(StringRef name) const {
   Archive::symbol_iterator bs = symbol_begin();
   Archive::symbol_iterator es = symbol_end();

diff  --git a/llvm/test/tools/llvm-lib/ecsymbols.test b/llvm/test/tools/llvm-lib/ecsymbols.test
index e38545ab22d6d..f04e2d3d63643 100644
--- a/llvm/test/tools/llvm-lib/ecsymbols.test
+++ b/llvm/test/tools/llvm-lib/ecsymbols.test
@@ -1,10 +1,9 @@
-# Check that llvm-lib can list the members of an archive which contains the
+# Check that llvm-lib doesn't list the members of an archive which contains the
 # special member /<ECSYMBOLS>/.
 
 # RUN: yaml2obj %s -o %t.lib
-# RUN: llvm-lib /list %t.lib | FileCheck %s
-
-# CHECK: /<ECSYMBOLS>/
+# RUN: llvm-lib /list %t.lib | FileCheck --check-prefix=NOEC --allow-empty %s
+# NOEC-NOT: ECSYMBOLS
 
 --- !Arch
 Members:
@@ -23,3 +22,12 @@ Members:
 # RUN: llvm-mc -triple=x86_64-pc-windows-msvc -filetype=obj -o x64-foo.o %S/Inputs/b.s
 # RUN: llvm-lib -machine:arm64ec -out:foo.lib arm64-foo.o arm64ec-foo.o x64-foo.o
 # RUN: grep -q '/<ECSYMBOLS>/' foo.lib
+
+# RUN: llvm-nm --print-armap foo.lib | FileCheck %s
+# CHECK: Archive map
+# CHECK-NEXT: a in arm64-foo.o
+# CHECK-EMPTY:
+# CHECK-NEXT: Archive EC map
+# CHECK-NEXT: a in arm64ec-foo.o
+# CHECK-NEXT: b in x64-foo.o
+# CHECK-EMPTY:

diff  --git a/llvm/test/tools/llvm-nm/ecsymbols.test b/llvm/test/tools/llvm-nm/ecsymbols.test
new file mode 100644
index 0000000000000..02e4db990650e
--- /dev/null
+++ b/llvm/test/tools/llvm-nm/ecsymbols.test
@@ -0,0 +1,90 @@
+# Check that llvm-lib doesn't list the members of an archive which contains the
+# special member /<ECSYMBOLS>/.
+
+# RUN: yaml2obj --docnum=1 %s -o %t.lib
+# RUN: llvm-nm --print-armap %t.lib 2>&1 | FileCheck --check-prefix=NM1 %s
+# NM1: truncated or malformed archive (invalid EC symbols size (3))
+
+--- !Arch
+Members:
+  - Name:            '/'
+    Size:            '0'
+  - Name:            '/'
+    Size:            '0'
+  - Name:            '/<ECSYMBOLS>/'
+    Size:            '3'
+    Content:         010203
+    PaddingByte:     0
+...
+
+# RUN: yaml2obj --docnum=2 %s -o %t.lib
+# RUN: llvm-nm --print-armap %t.lib 2>&1 | FileCheck --check-prefix=NM2 %s
+# NM2: truncated or malformed archive (invalid EC symbols size. Size was 7, but expected 8)
+
+--- !Arch
+Members:
+  - Name:            '/'
+    Size:            '0'
+  - Name:            '/'
+    Size:            '12'
+    Content:         010000001000000000000000
+  - Name:            '/<ECSYMBOLS>/'
+    Size:            '7'
+    Content:         02000000010001
+    PaddingByte:     0
+...
+
+# RUN: yaml2obj --docnum=3 %s -o %t.lib
+# RUN: llvm-nm --print-armap %t.lib 2>&1 | FileCheck --check-prefix=NM3 %s
+# NM3: truncated or malformed archive (invalid EC symbol index 2 is larger than member count 1)
+
+--- !Arch
+Members:
+  - Name:            '/'
+    Size:            '0'
+  - Name:            '/'
+    Size:            '12'
+    Content:         010000001000000000000000
+  - Name:            '/<ECSYMBOLS>/'
+    Size:            '8'
+    Content:         0100000002006100
+  - Name:            'a.obj'
+    Size:            '0'
+...
+
+
+# RUN: yaml2obj --docnum=4 %s -o %t.lib
+# RUN: llvm-nm --print-armap %t.lib 2>&1 | FileCheck --check-prefix=NM4 %s
+# NM4: truncated or malformed archive (invalid EC symbol index 0)
+
+--- !Arch
+Members:
+  - Name:            '/'
+    Size:            '0'
+  - Name:            '/'
+    Size:            '12'
+    Content:         010000001000000000000000
+  - Name:            '/<ECSYMBOLS>/'
+    Size:            '8'
+    Content:         0100000000006100
+  - Name:            'a.obj'
+    Size:            '0'
+...
+
+# RUN: yaml2obj --docnum=5 %s -o %t.lib
+# RUN: llvm-nm --print-armap %t.lib 2>&1 | FileCheck --check-prefix=NM5 %s
+# NM5: truncated or malformed archive (malformed EC symbol names: not null-terminated)
+
+--- !Arch
+Members:
+  - Name:            '/'
+    Size:            '0'
+  - Name:            '/'
+    Size:            '12'
+    Content:         010000001000000000000000
+  - Name:            '/<ECSYMBOLS>/'
+    Size:            '8'
+    Content:         0100000001006161
+  - Name:            'a.obj'
+    Size:            '0'
+...

diff  --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index d5fba08bbe319..96baca5476ba6 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -1954,26 +1954,39 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, StringRef Filename) {
   return true;
 }
 
+static void printArchiveMap(iterator_range<Archive::symbol_iterator> &map,
+                            StringRef Filename) {
+  for (auto I : map) {
+    Expected<Archive::Child> C = I.getMember();
+    if (!C) {
+      error(C.takeError(), Filename);
+      break;
+    }
+    Expected<StringRef> FileNameOrErr = C->getName();
+    if (!FileNameOrErr) {
+      error(FileNameOrErr.takeError(), Filename);
+      break;
+    }
+    StringRef SymName = I.getName();
+    outs() << SymName << " in " << FileNameOrErr.get() << "\n";
+  }
+
+  outs() << "\n";
+}
+
 static void dumpArchiveMap(Archive *A, StringRef Filename) {
-  Archive::symbol_iterator I = A->symbol_begin();
-  Archive::symbol_iterator E = A->symbol_end();
-  if (I != E) {
+  auto Map = A->symbols();
+  if (!Map.empty()) {
     outs() << "Archive map\n";
-    for (; I != E; ++I) {
-      Expected<Archive::Child> C = I->getMember();
-      if (!C) {
-        error(C.takeError(), Filename);
-        break;
-      }
-      Expected<StringRef> FileNameOrErr = C->getName();
-      if (!FileNameOrErr) {
-        error(FileNameOrErr.takeError(), Filename);
-        break;
-      }
-      StringRef SymName = I->getName();
-      outs() << SymName << " in " << FileNameOrErr.get() << "\n";
-    }
-    outs() << "\n";
+    printArchiveMap(Map, Filename);
+  }
+
+  auto ECMap = A->ec_symbols();
+  if (!ECMap) {
+    warn(ECMap.takeError(), Filename);
+  } else if (!ECMap->empty()) {
+    outs() << "Archive EC map\n";
+    printArchiveMap(*ECMap, Filename);
   }
 }
 


        


More information about the llvm-commits mailing list