[llvm] [llvm-symbolizer] Recognize AIX big archive (PR #150401)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 8 01:47:10 PDT 2025


================
@@ -557,57 +558,149 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
   if (!DbgObj)
     DbgObj = Obj;
   ObjectPair Res = std::make_pair(Obj, DbgObj);
-  std::string DbgObjPath = DbgObj->getFileName().str();
   auto Pair =
       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
-  BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
-    ObjectPairForPathArch.erase(I);
-  });
+  std::string DbgObjPath = DbgObj->getFileName().str();
+  auto BinIter = BinaryForPath.find(DbgObjPath);
+  if (BinIter != BinaryForPath.end()) {
+    BinIter->second.pushEvictor(
+        [this, I = Pair.first]() { ObjectPairForPathArch.erase(I); });
+  }
   return Res;
 }
 
-Expected<ObjectFile *>
-LLVMSymbolizer::getOrCreateObject(const std::string &Path,
-                                  const std::string &ArchName) {
-  Binary *Bin;
+Expected<object::Binary *>
+LLVMSymbolizer::loadOrGetBinary(const std::string &Path) {
   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
   if (!Pair.second) {
-    Bin = Pair.first->second->getBinary();
     recordAccess(Pair.first->second);
-  } else {
-    Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
-    if (!BinOrErr)
-      return BinOrErr.takeError();
+    return Pair.first->second->getBinary();
+  }
 
-    CachedBinary &CachedBin = Pair.first->second;
-    CachedBin = std::move(BinOrErr.get());
-    CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
-    LRUBinaries.push_back(CachedBin);
-    CacheSize += CachedBin.size();
-    Bin = CachedBin->getBinary();
+  Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
+  if (!BinOrErr) {
+    BinaryForPath.erase(Pair.first);
+    return BinOrErr.takeError();
   }
 
-  if (!Bin)
-    return static_cast<ObjectFile *>(nullptr);
+  CachedBinary &CachedBin = Pair.first->second;
+  CachedBin = std::move(*BinOrErr);
+  CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
+  LRUBinaries.push_back(CachedBin);
+  CacheSize += CachedBin.size();
+  return CachedBin->getBinary();
+}
 
-  if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
-    auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
-    if (I != ObjectForUBPathAndArch.end())
-      return I->second.get();
-
-    Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
-        UB->getMachOObjectForArch(ArchName);
-    if (!ObjOrErr) {
-      ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
-                                     std::unique_ptr<ObjectFile>());
-      return ObjOrErr.takeError();
+Expected<ObjectFile *> LLVMSymbolizer::findOrCacheObject(
+    const ArchiveCacheKey &Key,
+    llvm::function_ref<Expected<std::unique_ptr<ObjectFile>>()> Loader,
+    const std::string &PathForBinaryCache) {
+
+  auto It = ObjectFileCache.find(Key);
+  if (It != ObjectFileCache.end())
+    return It->second.get();
+
+  Expected<std::unique_ptr<ObjectFile>> ObjOrErr = Loader();
+  if (!ObjOrErr) {
+    ObjectFileCache.emplace(Key, std::unique_ptr<ObjectFile>());
+    return ObjOrErr.takeError();
+  }
+
+  ObjectFile *Res = ObjOrErr->get();
+  auto NewEntry = ObjectFileCache.emplace(Key, std::move(*ObjOrErr));
+  auto CacheIter = BinaryForPath.find(PathForBinaryCache);
+  if (CacheIter != BinaryForPath.end())
+    CacheIter->second.pushEvictor(
+        [this, Iter = NewEntry.first]() { ObjectFileCache.erase(Iter); });
+  return Res;
+}
+
+Expected<ObjectFile *> LLVMSymbolizer::getOrCreateObjectFromArchive(
+    StringRef ArchivePath, StringRef MemberName, StringRef ArchName) {
+  Expected<object::Binary *> BinOrErr = loadOrGetBinary(ArchivePath.str());
+  if (!BinOrErr)
+    return BinOrErr.takeError();
+  object::Binary *Bin = *BinOrErr;
+
+  object::Archive *Archive = dyn_cast_if_present<object::Archive>(Bin);
+  if (!Archive)
+    return createStringError(std::errc::invalid_argument,
+                             "'%s' is not a valid archive",
+                             ArchivePath.str().c_str());
+
+  Error Err = Error::success();
+  // On AIX, archives can contain multiple members with the same name but
+  // different types. We need to check all matches and find one that matches
+  // both name and architecture.
+  for (auto &Child : Archive->children(Err, /*SkipInternal=*/true)) {
+    Expected<StringRef> NameOrErr = Child.getName();
+    if (!NameOrErr)
+      continue;
+    if (*NameOrErr == sys::path::filename(MemberName)) {
+      Expected<std::unique_ptr<object::Binary>> MemberOrErr =
+          Child.getAsBinary();
+      if (!MemberOrErr)
+        continue;
+
+      std::unique_ptr<object::Binary> Binary = std::move(*MemberOrErr);
+      if (auto *Obj = dyn_cast<object::ObjectFile>(Binary.get())) {
+        Triple::ArchType ObjArch = Obj->makeTriple().getArch();
+        Triple RequestedTriple;
+        RequestedTriple.setArch(Triple::getArchTypeForLLVMName(ArchName));
+        if (ObjArch != RequestedTriple.getArch())
+          continue;
+
+        ArchiveCacheKey CacheKey{ArchivePath.str(), MemberName.str(),
+                                 ArchName.str()};
+        Expected<ObjectFile *> Res = findOrCacheObject(
+            CacheKey,
+            [O = std::unique_ptr<ObjectFile>(
+                 Obj)]() mutable -> Expected<std::unique_ptr<ObjectFile>> {
+              return std::move(O);
+            },
+            ArchivePath.str());
+        Binary.release();
+        return Res;
+      }
+    }
+  }
+  if (Err)
+    return std::move(Err);
+  return createStringError(std::errc::invalid_argument,
+                           "no matching member '%s' with arch '%s' in '%s'",
+                           MemberName.str().c_str(), ArchName.str().c_str(),
+                           ArchivePath.str().c_str());
+}
+
+Expected<ObjectFile *>
+LLVMSymbolizer::getOrCreateObject(const std::string &Path,
+                                  const std::string &ArchName) {
+  // First check for archive(member) format - more efficient to check closing
+  // paren first.
+  size_t CloseParen = Path.rfind(')');
+  if (CloseParen != std::string::npos && CloseParen == Path.length() - 1) {
----------------
midhuncodes7 wrote:

Though the suggestion would efficiently check for ')', upon having an empty string it would need an explicit check for UB whereas current code returns npos safely.
For valid case, we still end up using rfind to get the matching '(' . Effectively we'd just reduce 2 * O(N) to 1 * O(N) and 1 * O(1).

The comment here mentioning about efficiency is not about code efficiency instead it is about checking for the closing paran first instead of opening paren or some other logic.

If you still insist on making this change, I'll go ahead and do it

https://github.com/llvm/llvm-project/pull/150401


More information about the llvm-commits mailing list