[llvm] big archive recognition by the llvm-symbolizer (PR #150401)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 24 03:50:05 PDT 2025


https://github.com/midhuncodes7 created https://github.com/llvm/llvm-project/pull/150401

This PR implements big archive recognition by the symbolizer.
The archive input format should be in archive.a(member.o) format



>From d98ed0219d45b6e66fdab6773e2c4f77f6b0e3eb Mon Sep 17 00:00:00 2001
From: Midhunesh <midhunesh.p at ibm.com>
Date: Mon, 21 Jul 2025 14:24:50 +0530
Subject: [PATCH 1/4] big archive recognition implementation

---
 llvm/docs/CommandGuide/llvm-symbolizer.rst    |  18 ++-
 .../llvm/DebugInfo/Symbolize/Symbolize.h      |  26 ++++
 llvm/lib/DebugInfo/Symbolize/Symbolize.cpp    | 102 +++++++++++++--
 .../test/DebugInfo/Inputs/big-archive-32.yaml | 119 ++++++++++++++++++
 .../test/DebugInfo/Inputs/big-archive-64.yaml |  26 ++++
 .../DebugInfo/Inputs/big-archive-elf-1.yaml   |  68 ++++++++++
 .../DebugInfo/Inputs/big-archive-elf-2.yaml   |  68 ++++++++++
 .../DebugInfo/symbolize-big-archive-elf.test  |  24 ++++
 .../symbolize-big-archive-xcoff.test          |  26 ++++
 llvm/tools/llvm-symbolizer/Opts.td            |   6 +-
 10 files changed, 469 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/DebugInfo/Inputs/big-archive-32.yaml
 create mode 100644 llvm/test/DebugInfo/Inputs/big-archive-64.yaml
 create mode 100644 llvm/test/DebugInfo/Inputs/big-archive-elf-1.yaml
 create mode 100644 llvm/test/DebugInfo/Inputs/big-archive-elf-2.yaml
 create mode 100644 llvm/test/DebugInfo/symbolize-big-archive-elf.test
 create mode 100644 llvm/test/DebugInfo/symbolize-big-archive-xcoff.test

diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst
index 2da1b2470a83e..8f3a132139fe9 100644
--- a/llvm/docs/CommandGuide/llvm-symbolizer.rst
+++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst
@@ -535,16 +535,20 @@ MACH-O SPECIFIC OPTIONS
 .. option:: --default-arch <arch>
 
   If a binary contains object files for multiple architectures (e.g. it is a
-  Mach-O universal binary), symbolize the object file for a given architecture.
-  You can also specify the architecture by writing ``binary_name:arch_name`` in
-  the input (see example below). If the architecture is not specified in either
-  way, the address will not be symbolized. Defaults to empty string.
+  Mach-O universal binary or an AIX archive with architecture variants),
+  symbolize the object file for a given architecture. You can also specify
+  the architecture by writing ``binary_name:arch_name`` in the input (see
+  example below). For AIX archives, the format ``archive.a(member.o):arch``
+  is also supported. If the architecture is not specified in either way,
+  the address will not be symbolized. Defaults to empty string.
 
   .. code-block:: console
 
     $ cat addr.txt
     /tmp/mach_universal_binary:i386 0x1f84
     /tmp/mach_universal_binary:x86_64 0x100000f24
+    /tmp/archive.a(member.o):ppc 0x1000
+    /tmp/archive.a(member.o):ppc64 0x2000
 
     $ llvm-symbolizer < addr.txt
     _main
@@ -553,6 +557,12 @@ MACH-O SPECIFIC OPTIONS
     _main
     /tmp/source_x86_64.cc:8
 
+    _foo
+    /tmp/source_ppc.cc:12
+    
+    _foo
+    /tmp/source_ppc64.cc:12
+
 .. option:: --dsym-hint <path/to/file.dSYM>
 
   If the debug info for a binary isn't present in the default location, look for
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index fb8f3d8af6b1b..5144085f3e23c 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -29,6 +29,12 @@
 #include <utility>
 #include <vector>
 
+#if defined(_AIX)
+#  define SYMBOLIZE_AIX 1
+#else
+#  define SYMBOLIZE_AIX 0
+#endif
+
 namespace llvm {
 namespace object {
 class ELFObjectFileBase;
@@ -202,6 +208,12 @@ class LLVMSymbolizer {
   Expected<ObjectFile *> getOrCreateObject(const std::string &Path,
                                            const std::string &ArchName);
 
+  /// Return a pointer to object file at specified path, for a specified
+  /// architecture that is present inside an archive file
+  Expected<ObjectFile *> getOrCreateObjectFromArchive(StringRef ArchivePath,
+                                                      StringRef MemberName,
+                                                      const std::string &ArchName);   
+
   /// Update the LRU cache order when a binary is accessed.
   void recordAccess(CachedBinary &Bin);
 
@@ -226,6 +238,20 @@ class LLVMSymbolizer {
   std::map<std::pair<std::string, std::string>, std::unique_ptr<ObjectFile>>
       ObjectForUBPathAndArch;
 
+  struct ArchiveCacheKey {
+    std::string ArchivePath;  // Storage for StringRef
+    std::string MemberName;   // Storage for StringRef
+    std::string ArchName;     // Storage for StringRef
+
+    // Required for map comparison
+    bool operator<(const ArchiveCacheKey &Other) const {
+      return std::tie(ArchivePath, MemberName, ArchName) < 
+             std::tie(Other.ArchivePath, Other.MemberName, Other.ArchName);
+    }
+  };
+
+  std::map<ArchiveCacheKey, std::unique_ptr<ObjectFile>> ObjectForArchivePathAndArch;
+  
   Options Opts;
 
   std::unique_ptr<BuildIDFetcher> BIDFetcher;
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 56527719da51f..6dddc3a709239 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Object/Archive.h"
 #include <cassert>
 #include <cstring>
 
@@ -286,6 +287,7 @@ LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
 
 void LLVMSymbolizer::flush() {
   ObjectForUBPathAndArch.clear();
+  ObjectForArchivePathAndArch.clear();
   LRUBinaries.clear();
   CacheSize = 0;
   BinaryForPath.clear();
@@ -321,7 +323,7 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
 
 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
                              uint32_t &CRCHash) {
-  if (!Obj)
+  if (!Obj || !isa<ObjectFile>(Obj))
     return false;
   for (const SectionRef &Section : Obj->sections()) {
     StringRef Name;
@@ -557,19 +559,101 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
   if (!DbgObj)
     DbgObj = Obj;
   ObjectPair Res = std::make_pair(Obj, DbgObj);
-  std::string DbgObjPath = DbgObj->getFileName().str();
   auto Pair =
       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
-  BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
+  std::string DbgObjPath = DbgObj->getFileName().str();
+  auto BinIter = BinaryForPath.find(DbgObjPath);
+  if (BinIter != BinaryForPath.end()) {
+    BinIter->second.pushEvictor([this, I = Pair.first]() {
     ObjectPairForPathArch.erase(I);
-  });
+    });
+  }
   return Res;
 }
 
+Expected<ObjectFile *> LLVMSymbolizer::getOrCreateObjectFromArchive(StringRef ArchivePath, 
+                                                                    StringRef MemberName, 
+                                                                    const std::string &ArchName) {
+  Binary *Bin = nullptr;
+  auto Pair = BinaryForPath.emplace(ArchivePath.str(), OwningBinary<Binary>());
+  if (!Pair.second) {
+    Bin = Pair.first->second->getBinary();
+    recordAccess(Pair.first->second);
+  } else {
+    Expected<OwningBinary<Binary>> ArchiveOrErr = createBinary(ArchivePath);
+    if (!ArchiveOrErr) {
+      return ArchiveOrErr.takeError();
+    }
+
+    CachedBinary &CachedBin = Pair.first->second;
+    CachedBin = std::move(ArchiveOrErr.get());
+    CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
+    LRUBinaries.push_back(CachedBin);
+    CacheSize += CachedBin.size();
+    Bin = CachedBin->getBinary();
+  }
+
+  if (!Bin || !isa<object::Archive>(Bin))
+    return errorCodeToError(object_error::invalid_file_type);
+
+  object::Archive *Archive = cast<object::Archive>(Bin);
+  Error Err = Error::success();
+  
+  // On AIX, archives can contain multiple members with same name but different types
+  // We need to check all matches and find one that matches both name and architecture
+  for (auto &Child : Archive->children(Err, /*SkipInternal=*/true)) {
+    Expected<StringRef> NameOrErr = Child.getName();
+    if (!NameOrErr)
+      continue; 
+    if (*NameOrErr == llvm::sys::path::filename(MemberName)) {
+      Expected<std::unique_ptr<object::Binary>> MemberOrErr = Child.getAsBinary();
+      if (!MemberOrErr)
+        continue; 
+      
+      std::unique_ptr<object::Binary> Binary = std::move(*MemberOrErr);
+      if (auto *Obj = dyn_cast<object::ObjectFile>(Binary.get())) {
+#if defined(_AIX)
+        Triple::ArchType ObjArch = Obj->makeTriple().getArch();
+        Triple RequestedTriple;
+        RequestedTriple.setArch(Triple::getArchTypeForLLVMName(ArchName));
+        if (ObjArch != RequestedTriple.getArch())
+          continue;
+#endif
+        ArchiveCacheKey CacheKey{ArchivePath.str(), MemberName.str(), ArchName};
+        auto I = ObjectForArchivePathAndArch.find(CacheKey);
+        if (I != ObjectForArchivePathAndArch.end())
+          return I->second.get();
+
+        auto CachedObj = std::unique_ptr<ObjectFile>(Obj);
+        auto NewEntry = ObjectForArchivePathAndArch.emplace(
+            CacheKey, std::move(CachedObj));
+        Binary.release();
+        BinaryForPath.find(ArchivePath.str())->second.pushEvictor(
+            [this, Iter = NewEntry.first]() { ObjectForArchivePathAndArch.erase(Iter); });
+        return NewEntry.first->second.get();
+      }
+    }
+  }
+  if (Err)
+    return std::move(Err);
+  return errorCodeToError(object_error::arch_not_found);
+}
+
 Expected<ObjectFile *>
 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
                                   const std::string &ArchName) {
-  Binary *Bin;
+  // First check for archive(member) format - more efficient to check closing paren first
+  size_t CloseParen = Path.rfind(')');
+  if (CloseParen != std::string::npos && CloseParen == Path.length() - 1) {
+    size_t OpenParen = Path.rfind('(', CloseParen);
+    if (OpenParen != std::string::npos) {
+      StringRef ArchivePath = StringRef(Path).substr(0, OpenParen);
+      StringRef MemberName = StringRef(Path).substr(OpenParen + 1, CloseParen - OpenParen - 1);
+      return getOrCreateObjectFromArchive(ArchivePath, MemberName, ArchName);
+    }
+  }
+
+  Binary *Bin = nullptr;
   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
   if (!Pair.second) {
     Bin = Pair.first->second->getBinary();
@@ -648,7 +732,9 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
 
   auto I = Modules.find(ModuleName);
   if (I != Modules.end()) {
-    recordAccess(BinaryForPath.find(BinaryName)->second);
+    auto BinIter = BinaryForPath.find(BinaryName);
+    if (BinIter != BinaryForPath.end())
+      recordAccess(BinIter->second);
     return I->second.get();
   }
 
@@ -716,7 +802,9 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
       createModuleInfo(Objects.first, std::move(Context), ModuleName);
   if (ModuleOrErr) {
     auto I = Modules.find(ModuleName);
-    BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
+    auto BinIter = BinaryForPath.find(BinaryName);
+    if (BinIter != BinaryForPath.end()) 
+      BinIter->second.pushEvictor([this, I]() {
       Modules.erase(I);
     });
   }
diff --git a/llvm/test/DebugInfo/Inputs/big-archive-32.yaml b/llvm/test/DebugInfo/Inputs/big-archive-32.yaml
new file mode 100644
index 0000000000000..2080607a1a88c
--- /dev/null
+++ b/llvm/test/DebugInfo/Inputs/big-archive-32.yaml
@@ -0,0 +1,119 @@
+--- !XCOFF
+FileHeader:
+  MagicNumber:     0x1DF
+  NumberOfSections: 2
+  CreationTime:    0
+  OffsetToSymbolTable: 0xA0
+  EntriesInSymbolTable: 11
+  AuxiliaryHeaderSize: 0
+  Flags:           0x0
+Sections:
+  - Name:            .text
+    Address:         0x0
+    Size:            0x1C
+    FileOffsetToData: 0x64
+    FileOffsetToRelocations: 0x0
+    FileOffsetToLineNumbers: 0x0
+    NumberOfRelocations: 0x0
+    NumberOfLineNumbers: 0x0
+    Flags:           [ STYP_TEXT ]
+    SectionData:     4E800020000000000009204000000001000000040003666F6F000000
+  - Name:            .data
+    Address:         0x1C
+    Size:            0xC
+    FileOffsetToData: 0x80
+    FileOffsetToRelocations: 0x8C
+    FileOffsetToLineNumbers: 0x0
+    NumberOfRelocations: 0x2
+    NumberOfLineNumbers: 0x0
+    Flags:           [ STYP_DATA ]
+    SectionData:     '000000000000002800000000'
+    Relocations:
+      - Address:         0x1C
+        Symbol:          0x5
+        Info:            0x1F
+        Type:            0x0
+      - Address:         0x20
+        Symbol:          0x9
+        Info:            0x1F
+        Type:            0x0
+Symbols:
+  - Name:            .file
+    Value:           0x0
+    Section:         N_DEBUG
+    Type:            0x18
+    StorageClass:    C_FILE
+    NumberOfAuxEntries: 2
+    AuxEntries:
+      - Type:            AUX_FILE
+        FileNameOrString: foo.c
+        FileStringType:  XFT_FN
+      - Type:            AUX_FILE
+        FileNameOrString: 'IBM Open XL C/C++ for AIX 17.1.3 (5725-C72, 5765-J18), version 17.1.3.0, LLVM version 21.0.0git (145c02cece3630765e6412e6820bc446ddb4e138)'
+        FileStringType:  XFT_CV
+  - Name:            ''
+    Value:           0x0
+    Section:         .text
+    Type:            0x0
+    StorageClass:    C_HIDEXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_SD
+        SymbolAlignment: 5
+        StorageMappingClass: XMC_PR
+        SectionOrLength: 25
+        StabInfoIndex:   0
+        StabSectNum:     0
+  - Name:            .foo
+    Value:           0x0
+    Section:         .text
+    Type:            0x0
+    StorageClass:    C_EXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_LD
+        SymbolAlignment: 0
+        StorageMappingClass: XMC_PR
+        SectionOrLength: 3
+        StabInfoIndex:   0
+        StabSectNum:     0
+  - Name:            foo
+    Value:           0x1C
+    Section:         .data
+    Type:            0x0
+    StorageClass:    C_EXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_SD
+        SymbolAlignment: 2
+        StorageMappingClass: XMC_DS
+        SectionOrLength: 12
+        StabInfoIndex:   0
+        StabSectNum:     0
+  - Name:            TOC
+    Value:           0x28
+    Section:         .data
+    Type:            0x0
+    StorageClass:    C_HIDEXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_SD
+        SymbolAlignment: 2
+        StorageMappingClass: XMC_TC0
+        SectionOrLength: 0
+        StabInfoIndex:   0
+        StabSectNum:     0
+StringTable:     {}
+...
diff --git a/llvm/test/DebugInfo/Inputs/big-archive-64.yaml b/llvm/test/DebugInfo/Inputs/big-archive-64.yaml
new file mode 100644
index 0000000000000..c1078efe2407e
--- /dev/null
+++ b/llvm/test/DebugInfo/Inputs/big-archive-64.yaml
@@ -0,0 +1,26 @@
+--- !XCOFF
+FileHeader:
+  Magic: 0x01F7        # XCOFF64 magic number
+  NumberOfSections: 1
+  TimeStamp: 0
+  SymbolTableOffset: 0
+  NumberOfSymTableEntries: 1
+OptionalHeader:
+  Flags: 0x0
+Sections:
+  - Name: .text
+    Address: 0x0`
+    Size: 1
+    FileOffsetToRawData: 0x100
+    FileOffsetToRelocations: 0
+    FileOffsetToLineNumbers: 0
+    NumberOfRelocations: 0
+    NumberOfLineNumbers: 0
+    Flags: STYP_TEXT
+    SectionData: '00'
+Symbols:
+  - Name: foo
+    Value: 0
+    SectionNumber: 1
+    StorageClass: C_EXT
+    NumberOfAuxEntries: 0
diff --git a/llvm/test/DebugInfo/Inputs/big-archive-elf-1.yaml b/llvm/test/DebugInfo/Inputs/big-archive-elf-1.yaml
new file mode 100644
index 0000000000000..8e5c929e82878
--- /dev/null
+++ b/llvm/test/DebugInfo/Inputs/big-archive-elf-1.yaml
@@ -0,0 +1,68 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_PPC64
+  Flags:           [  ]
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x10
+    Content:         '2000804E000000000000000000000000'
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         0049424D204F70656E20584C20432F432B2B20666F72204C696E7578206F6E20506F7765722031372E312E322028353732352D4337322C20353736352D4A3230292C2076657273696F6E2031372E312E322E302C20636C616E672076657273696F6E2032312E302E306769742028676974406769746875622E69626D2E636F6D3A636F6D70696C65722F6C6C766D2D70726F6A6563742E67697420653165653233663838333532623937333563363735386661396335653035313366626234393361322900
+  - Name:            .note.GNU-stack
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    AddressAlign:    0x8
+    Content:         1000000000000000017A5200047841011B0C01001000000018000000000000001000000000000000
+  - Name:            .rela.eh_frame
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .eh_frame
+    Relocations:
+      - Offset:          0x1C
+        Symbol:          .text
+        Type:            R_PPC64_REL32
+  - Name:            .llvm_addrsig
+    Type:            SHT_LLVM_ADDRSIG
+    Flags:           [ SHF_EXCLUDE ]
+    Link:            .symtab
+    AddressAlign:    0x1
+    Offset:          0x1B8
+    Symbols:         [  ]
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .text
+      - Name:            .comment
+      - Name:            .note.GNU-stack
+      - Name:            .eh_frame
+      - Name:            .rela.eh_frame
+      - Name:            .llvm_addrsig
+      - Name:            .symtab
+Symbols:
+  - Name:            foo1.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            .text
+    Type:            STT_SECTION
+    Section:         .text
+  - Name:            foo1
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Size:            0x10
+...
diff --git a/llvm/test/DebugInfo/Inputs/big-archive-elf-2.yaml b/llvm/test/DebugInfo/Inputs/big-archive-elf-2.yaml
new file mode 100644
index 0000000000000..0052db732500f
--- /dev/null
+++ b/llvm/test/DebugInfo/Inputs/big-archive-elf-2.yaml
@@ -0,0 +1,68 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_PPC64
+  Flags:           [  ]
+  SectionHeaderStringTable: .strtab
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x10
+    Content:         '2000804E000000000000000000000000'
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         0049424D204F70656E20584C20432F432B2B20666F72204C696E7578206F6E20506F7765722031372E312E322028353732352D4337322C20353736352D4A3230292C2076657273696F6E2031372E312E322E302C20636C616E672076657273696F6E2032312E302E306769742028676974406769746875622E69626D2E636F6D3A636F6D70696C65722F6C6C766D2D70726F6A6563742E67697420653165653233663838333532623937333563363735386661396335653035313366626234393361322900
+  - Name:            .note.GNU-stack
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    AddressAlign:    0x8
+    Content:         1000000000000000017A5200047841011B0C01001000000018000000000000001000000000000000
+  - Name:            .rela.eh_frame
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .eh_frame
+    Relocations:
+      - Offset:          0x1C
+        Symbol:          .text
+        Type:            R_PPC64_REL32
+  - Name:            .llvm_addrsig
+    Type:            SHT_LLVM_ADDRSIG
+    Flags:           [ SHF_EXCLUDE ]
+    Link:            .symtab
+    AddressAlign:    0x1
+    Offset:          0x1B8
+    Symbols:         [  ]
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .text
+      - Name:            .comment
+      - Name:            .note.GNU-stack
+      - Name:            .eh_frame
+      - Name:            .rela.eh_frame
+      - Name:            .llvm_addrsig
+      - Name:            .symtab
+Symbols:
+  - Name:            foo2.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            .text
+    Type:            STT_SECTION
+    Section:         .text
+  - Name:            foo2
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Size:            0x10
+...
diff --git a/llvm/test/DebugInfo/symbolize-big-archive-elf.test b/llvm/test/DebugInfo/symbolize-big-archive-elf.test
new file mode 100644
index 0000000000000..2cd223333d546
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-big-archive-elf.test
@@ -0,0 +1,24 @@
+// Test archive member recognition by name (ELF format)
+
+// Generate object files from YAML
+// RUN: yaml2obj -o %t-1.o %S/Inputs/big-archive-elf-1.yaml
+// RUN: yaml2obj -o %t-2.o %S/Inputs/big-archive-elf-2.yaml
+
+// Create archive with differently named members
+// RUN: rm -f %t.a
+// RUN: llvm-ar crv %t.a %t-1.o %t-2.o
+
+// Verify archive contents
+// RUN: llvm-ar tv %t.a | FileCheck %s --check-prefix=CHECK-ARCHIVE
+// CHECK-ARCHIVE: {{.*}}-1.o
+// CHECK-ARCHIVE: {{.*}}-2.o
+
+// Test symbolization by member name (using just base names)
+// RUN: llvm-symbolizer --default-arch=ppc64 --obj="%t.a(%t-1.o)" 0x0 | FileCheck %s --check-prefix=CHECK-1
+// RUN: llvm-symbolizer --default-arch=ppc64 --obj="%t.a(%t-2.o)" 0x0 | FileCheck %s --check-prefix=CHECK-2
+// CHECK-1: foo1
+// CHECK-2: foo2
+
+// Test error cases
+// RUN: not llvm-symbolizer --obj="%t.a(nonexistent.o)" 0x1000 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+// CHECK-ERROR: error: {{.*}}No object file for requested architecture
diff --git a/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test b/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test
new file mode 100644
index 0000000000000..ae39fc929bea6
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test
@@ -0,0 +1,26 @@
+// Test big archive recognition and error handling in llvm-symbolizer
+
+// Generate object files
+// RUN: yaml2obj -o %t-32.o %S/Inputs/big-archive-32.yaml
+// RUN: yaml2obj -o %t-64.o %S/Inputs/big-archive-64.yaml
+
+// Create archive with same-named members using different modes
+// RUN: rm -f %t.a
+// RUN: cp %t-32.o %t.o && llvm-ar -X32 crv %t.a %t.o
+// RUN: cp %t-64.o %t.o && llvm-ar -X64 qv %t.a %t.o
+ 
+// Verify archive contains two members with same name
+// RUN: llvm-ar tv -X32_64 %t.a | FileCheck %s --check-prefix=CHECK-ARCHIVE
+// CHECK-ARCHIVE: {{.*}}symbolize-big-archive-xcoff.test.tmp.o{{$}}
+// CHECK-ARCHIVE: {{.*}}symbolize-big-archive-xcoff.test.tmp.o{{$}}
+
+// Test successful symbolization
+// RUN: llvm-symbolizer --default-arch=ppc --obj="%t.a(%t.o)" 0x0 | FileCheck %s --check-prefix=CHECK-32
+// RUN: llvm-symbolizer --default-arch=ppc64 --obj="%t.a(%t.o)" 0x0 | FileCheck %s --check-prefix=CHECK-64
+// CHECK-32: foo
+// CHECK-64: foo
+
+// Test error cases
+// RUN: not llvm-symbolizer --obj="%t.a(nonexistent.o)" 0x1000 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+// CHECK-ERROR: error: {{.*}}No object file for requested architecture
diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td
index 10f1e6dbbddf7..f616109346c55 100644
--- a/llvm/tools/llvm-symbolizer/Opts.td
+++ b/llvm/tools/llvm-symbolizer/Opts.td
@@ -15,7 +15,7 @@ class F<string name, string help>: Flag<["--"], name>, HelpText<help>;
 
 def grp_mach_o : OptionGroup<"kind">,
                  HelpText<"llvm-symbolizer Mach-O Specific Options">;
-
+def grp_symbolizer : OptionGroup<"Symbolizer Options">;
 def grp_gsym : OptionGroup<"kind">,
                HelpText<"llvm-symbolizer GSYM Related Options">;
 
@@ -31,8 +31,8 @@ def color_EQ : Joined<["--"], "color=">, HelpText<"Whether to use color when sym
 defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"<dir>">;
 defm debuginfod : B<"debuginfod", "Use debuginfod to find debug binaries", "Don't use debuginfod to find debug binaries">;
 defm default_arch
-    : Eq<"default-arch", "Default architecture (for multi-arch objects)">,
-      Group<grp_mach_o>;
+    : Eq<"default-arch", "Default architecture for multi-arch containers (Mach-O objects or AIX archives)">,
+      Group<grp_symbolizer>;
 defm demangle : B<"demangle", "Demangle function names", "Don't demangle function names">;
 def disable_gsym : F<"disable-gsym", "Don't consider using GSYM files for symbolication">, Group<grp_gsym>;
 def filter_markup : Flag<["--"], "filter-markup">, HelpText<"Filter symbolizer markup from stdin.">;

>From 64639e1512af8f31a9dfd909082f3c2a496bd914 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhunesh.p at ibm.com>
Date: Tue, 22 Jul 2025 15:25:01 +0530
Subject: [PATCH 2/4] ELF test not supported on AIX

---
 llvm/test/DebugInfo/symbolize-big-archive-elf.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/DebugInfo/symbolize-big-archive-elf.test b/llvm/test/DebugInfo/symbolize-big-archive-elf.test
index 2cd223333d546..87442f633548d 100644
--- a/llvm/test/DebugInfo/symbolize-big-archive-elf.test
+++ b/llvm/test/DebugInfo/symbolize-big-archive-elf.test
@@ -1,4 +1,5 @@
 // Test archive member recognition by name (ELF format)
+// UNSUPPORTED: target={{.*}}-aix{{.*}}
 
 // Generate object files from YAML
 // RUN: yaml2obj -o %t-1.o %S/Inputs/big-archive-elf-1.yaml

>From f5a357ef3d638cc7d603783755f646d035853bc5 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhuensh.p at ibm.com>
Date: Thu, 24 Jul 2025 05:07:26 -0400
Subject: [PATCH 3/4] update yaml script

---
 .../test/DebugInfo/Inputs/big-archive-64.yaml | 133 +++++++++++++++---
 1 file changed, 111 insertions(+), 22 deletions(-)

diff --git a/llvm/test/DebugInfo/Inputs/big-archive-64.yaml b/llvm/test/DebugInfo/Inputs/big-archive-64.yaml
index c1078efe2407e..9bbb1107555e0 100644
--- a/llvm/test/DebugInfo/Inputs/big-archive-64.yaml
+++ b/llvm/test/DebugInfo/Inputs/big-archive-64.yaml
@@ -1,26 +1,115 @@
 --- !XCOFF
 FileHeader:
-  Magic: 0x01F7        # XCOFF64 magic number
-  NumberOfSections: 1
-  TimeStamp: 0
-  SymbolTableOffset: 0
-  NumberOfSymTableEntries: 1
-OptionalHeader:
-  Flags: 0x0
+  MagicNumber:     0x1F7
+  NumberOfSections: 2
+  CreationTime:    0
+  OffsetToSymbolTable: 0xF8
+  EntriesInSymbolTable: 11
+  AuxiliaryHeaderSize: 0
+  Flags:           0x0
 Sections:
-  - Name: .text
-    Address: 0x0`
-    Size: 1
-    FileOffsetToRawData: 0x100
-    FileOffsetToRelocations: 0
-    FileOffsetToLineNumbers: 0
-    NumberOfRelocations: 0
-    NumberOfLineNumbers: 0
-    Flags: STYP_TEXT
-    SectionData: '00'
+  - Name:            .text
+    Address:         0x0
+    Size:            0x1C
+    FileOffsetToData: 0xA8
+    FileOffsetToRelocations: 0x0
+    FileOffsetToLineNumbers: 0x0
+    NumberOfRelocations: 0x0
+    NumberOfLineNumbers: 0x0
+    Flags:           [ STYP_TEXT ]
+    SectionData:     4E800020000000000009204000000001000000040003666F6F000000
+  - Name:            .data
+    Address:         0x20
+    Size:            0x18
+    FileOffsetToData: 0xC4
+    FileOffsetToRelocations: 0xDC
+    FileOffsetToLineNumbers: 0x0
+    NumberOfRelocations: 0x2
+    NumberOfLineNumbers: 0x0
+    Flags:           [ STYP_DATA ]
+    SectionData:     '000000000000000000000000000000380000000000000000'
+    Relocations:
+      - Address:         0x20
+        Symbol:          0x5
+        Info:            0x3F
+        Type:            0x0
+      - Address:         0x28
+        Symbol:          0x9
+        Info:            0x3F
+        Type:            0x0
 Symbols:
-  - Name: foo
-    Value: 0
-    SectionNumber: 1
-    StorageClass: C_EXT
-    NumberOfAuxEntries: 0
+  - Name:            .file
+    Value:           0x0
+    Section:         N_DEBUG
+    Type:            0x18
+    StorageClass:    C_FILE
+    NumberOfAuxEntries: 2
+    AuxEntries:
+      - Type:            AUX_FILE
+        FileNameOrString: foo.c
+        FileStringType:  XFT_FN
+      - Type:            AUX_FILE
+        FileNameOrString: 'IBM Open XL C/C++ for AIX 17.1.3 (5725-C72, 5765-J18), version 17.1.3.0, LLVM version 21.0.0git (5ca72bc8d2e87445649eab1825dffd2a047440ba)'
+        FileStringType:  XFT_CV
+  - Name:            ''
+    Value:           0x0
+    Section:         .text
+    Type:            0x0
+    StorageClass:    C_HIDEXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_SD
+        SymbolAlignment: 5
+        StorageMappingClass: XMC_PR
+        SectionOrLengthLo: 25
+        SectionOrLengthHi: 0
+  - Name:            .foo
+    Value:           0x0
+    Section:         .text
+    Type:            0x0
+    StorageClass:    C_EXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_LD
+        SymbolAlignment: 0
+        StorageMappingClass: XMC_PR
+        SectionOrLengthLo: 3
+        SectionOrLengthHi: 0
+  - Name:            foo
+    Value:           0x20
+    Section:         .data
+    Type:            0x0
+    StorageClass:    C_EXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_SD
+        SymbolAlignment: 3
+        StorageMappingClass: XMC_DS
+        SectionOrLengthLo: 24
+        SectionOrLengthHi: 0
+  - Name:            TOC
+    Value:           0x38
+    Section:         .data
+    Type:            0x0
+    StorageClass:    C_HIDEXT
+    NumberOfAuxEntries: 1
+    AuxEntries:
+      - Type:            AUX_CSECT
+        ParameterHashIndex: 0
+        TypeChkSectNum:  0
+        SymbolType:      XTY_SD
+        SymbolAlignment: 2
+        StorageMappingClass: XMC_TC0
+        SectionOrLengthLo: 0
+        SectionOrLengthHi: 0
+StringTable:     {}
+...

>From 305ef99231413fde40eac648a40217e6e69cf49a Mon Sep 17 00:00:00 2001
From: Midhunesh <midhunesh.p at ibm.com>
Date: Thu, 24 Jul 2025 16:00:48 +0530
Subject: [PATCH 4/4] target specific changes

---
 llvm/test/DebugInfo/symbolize-big-archive-xcoff.test | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test b/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test
index ae39fc929bea6..f034520638267 100644
--- a/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test
+++ b/llvm/test/DebugInfo/symbolize-big-archive-xcoff.test
@@ -1,4 +1,5 @@
 // Test big archive recognition and error handling in llvm-symbolizer
+// REQUIRES: system-aix, target={{.*}}-aix{{.*}}
 
 // Generate object files
 // RUN: yaml2obj -o %t-32.o %S/Inputs/big-archive-32.yaml



More information about the llvm-commits mailing list