[llvm] big archive recognition by the llvm-symbolizer (PR #150401)

James Henderson via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 22 01:14:05 PDT 2025


================
@@ -557,57 +558,149 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
   if (!DbgObj)
     DbgObj = Obj;
   ObjectPair Res = std::make_pair(Obj, DbgObj);
-  std::string DbgObjPath = DbgObj->getFileName().str();
   auto Pair =
       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
-  BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
-    ObjectPairForPathArch.erase(I);
-  });
+  std::string DbgObjPath = DbgObj->getFileName().str();
+  auto BinIter = BinaryForPath.find(DbgObjPath);
+  if (BinIter != BinaryForPath.end()) {
+    BinIter->second.pushEvictor(
+        [this, I = Pair.first]() { ObjectPairForPathArch.erase(I); });
+  }
   return Res;
 }
 
-Expected<ObjectFile *>
-LLVMSymbolizer::getOrCreateObject(const std::string &Path,
-                                  const std::string &ArchName) {
-  Binary *Bin;
+Expected<object::Binary *>
+LLVMSymbolizer::loadOrGetBinary(const std::string &Path) {
   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
   if (!Pair.second) {
-    Bin = Pair.first->second->getBinary();
     recordAccess(Pair.first->second);
-  } else {
-    Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
-    if (!BinOrErr)
-      return BinOrErr.takeError();
+    return Pair.first->second->getBinary();
+  }
 
-    CachedBinary &CachedBin = Pair.first->second;
-    CachedBin = std::move(BinOrErr.get());
-    CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
-    LRUBinaries.push_back(CachedBin);
-    CacheSize += CachedBin.size();
-    Bin = CachedBin->getBinary();
+  Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
+  if (!BinOrErr) {
+    BinaryForPath.erase(Pair.first);
+    return BinOrErr.takeError();
   }
 
-  if (!Bin)
-    return static_cast<ObjectFile *>(nullptr);
+  CachedBinary &CachedBin = Pair.first->second;
+  CachedBin = std::move(*BinOrErr);
+  CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
+  LRUBinaries.push_back(CachedBin);
+  CacheSize += CachedBin.size();
+  return CachedBin->getBinary();
+}
 
-  if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
-    auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
-    if (I != ObjectForUBPathAndArch.end())
-      return I->second.get();
-
-    Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
-        UB->getMachOObjectForArch(ArchName);
-    if (!ObjOrErr) {
-      ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
-                                     std::unique_ptr<ObjectFile>());
-      return ObjOrErr.takeError();
+Expected<ObjectFile *> LLVMSymbolizer::findOrCacheObject(
+    const ArchiveCacheKey &Key,
+    llvm::function_ref<Expected<std::unique_ptr<ObjectFile>>()> Loader,
+    const std::string &PathForBinaryCache) {
+
+  auto It = ObjectFileCache.find(Key);
+  if (It != ObjectFileCache.end())
+    return It->second.get();
+
+  Expected<std::unique_ptr<ObjectFile>> ObjOrErr = Loader();
+  if (!ObjOrErr) {
+    ObjectFileCache.emplace(Key, std::unique_ptr<ObjectFile>());
+    return ObjOrErr.takeError();
+  }
+
+  ObjectFile *Res = ObjOrErr->get();
+  auto NewEntry = ObjectFileCache.emplace(Key, std::move(*ObjOrErr));
+  auto CacheIter = BinaryForPath.find(PathForBinaryCache);
+  if (CacheIter != BinaryForPath.end())
+    CacheIter->second.pushEvictor(
+        [this, Iter = NewEntry.first]() { ObjectFileCache.erase(Iter); });
+  return Res;
+}
+
+Expected<ObjectFile *> LLVMSymbolizer::getOrCreateObjectFromArchive(
+    StringRef ArchivePath, StringRef MemberName, StringRef ArchName) {
+  Expected<object::Binary *> BinOrErr = loadOrGetBinary(ArchivePath.str());
+  if (!BinOrErr)
+    return BinOrErr.takeError();
+  object::Binary *Bin = *BinOrErr;
+
+  object::Archive *Archive = dyn_cast_if_present<object::Archive>(Bin);
+  if (!Archive)
+    return createStringError(std::errc::invalid_argument,
+                             "'%s' is not a valid archive",
+                             ArchivePath.str().c_str());
+
+  Error Err = Error::success();
+  // On AIX, archives can contain multiple members with the same name but
+  // different types. We need to check all matches and find one that matches
+  // both name and architecture.
+  for (auto &Child : Archive->children(Err, /*SkipInternal=*/true)) {
+    Expected<StringRef> NameOrErr = Child.getName();
+    if (!NameOrErr)
+      continue;
+    if (*NameOrErr == sys::path::filename(MemberName)) {
+      Expected<std::unique_ptr<object::Binary>> MemberOrErr =
+          Child.getAsBinary();
+      if (!MemberOrErr)
+        continue;
+
+      std::unique_ptr<object::Binary> Binary = std::move(*MemberOrErr);
+      if (auto *Obj = dyn_cast<object::ObjectFile>(Binary.get())) {
+        Triple::ArchType ObjArch = Obj->makeTriple().getArch();
+        Triple RequestedTriple;
+        RequestedTriple.setArch(Triple::getArchTypeForLLVMName(ArchName));
+        if (ObjArch != RequestedTriple.getArch())
+          continue;
+
+        ArchiveCacheKey CacheKey{ArchivePath.str(), MemberName.str(),
+                                 ArchName.str()};
+        Expected<ObjectFile *> Res = findOrCacheObject(
+            CacheKey,
+            [O = std::unique_ptr<ObjectFile>(
+                 Obj)]() mutable -> Expected<std::unique_ptr<ObjectFile>> {
+              return std::move(O);
+            },
+            ArchivePath.str());
+        Binary.release();
+        return Res;
+      }
+    }
+  }
+  if (Err)
+    return std::move(Err);
+  return createStringError(std::errc::invalid_argument,
+                           "no matching member '%s' with arch '%s' in '%s'",
+                           MemberName.str().c_str(), ArchName.str().c_str(),
+                           ArchivePath.str().c_str());
+}
+
+Expected<ObjectFile *>
+LLVMSymbolizer::getOrCreateObject(const std::string &Path,
+                                  const std::string &ArchName) {
+  // First check for archive(member) format - more efficient to check closing
+  // paren first.
+  size_t CloseParen = Path.rfind(')');
+  if (CloseParen != std::string::npos && CloseParen == Path.length() - 1) {
+    size_t OpenParen = Path.rfind('(', CloseParen);
+    if (OpenParen != std::string::npos) {
+      StringRef ArchivePath = StringRef(Path).substr(0, OpenParen);
+      StringRef MemberName =
+          StringRef(Path).substr(OpenParen + 1, CloseParen - OpenParen - 1);
----------------
jh7370 wrote:

What happens with the case where you have a string like `foo.a()`?

https://github.com/llvm/llvm-project/pull/150401


More information about the llvm-commits mailing list