[llvm] [llvm-symbolizer] Recognize AIX big archive (PR #150401)

James Henderson via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 10 02:05:22 PDT 2025


================
@@ -557,57 +558,149 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
   if (!DbgObj)
     DbgObj = Obj;
   ObjectPair Res = std::make_pair(Obj, DbgObj);
-  std::string DbgObjPath = DbgObj->getFileName().str();
   auto Pair =
       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
-  BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
-    ObjectPairForPathArch.erase(I);
-  });
+  std::string DbgObjPath = DbgObj->getFileName().str();
+  auto BinIter = BinaryForPath.find(DbgObjPath);
+  if (BinIter != BinaryForPath.end()) {
+    BinIter->second.pushEvictor(
+        [this, I = Pair.first]() { ObjectPairForPathArch.erase(I); });
+  }
   return Res;
 }
 
-Expected<ObjectFile *>
-LLVMSymbolizer::getOrCreateObject(const std::string &Path,
-                                  const std::string &ArchName) {
-  Binary *Bin;
+Expected<object::Binary *>
+LLVMSymbolizer::loadOrGetBinary(const std::string &Path) {
   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
   if (!Pair.second) {
-    Bin = Pair.first->second->getBinary();
     recordAccess(Pair.first->second);
-  } else {
-    Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
-    if (!BinOrErr)
-      return BinOrErr.takeError();
+    return Pair.first->second->getBinary();
+  }
 
-    CachedBinary &CachedBin = Pair.first->second;
-    CachedBin = std::move(BinOrErr.get());
-    CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
-    LRUBinaries.push_back(CachedBin);
-    CacheSize += CachedBin.size();
-    Bin = CachedBin->getBinary();
+  Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
+  if (!BinOrErr) {
+    BinaryForPath.erase(Pair.first);
+    return BinOrErr.takeError();
   }
 
-  if (!Bin)
-    return static_cast<ObjectFile *>(nullptr);
+  CachedBinary &CachedBin = Pair.first->second;
+  CachedBin = std::move(*BinOrErr);
+  CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
+  LRUBinaries.push_back(CachedBin);
+  CacheSize += CachedBin.size();
+  return CachedBin->getBinary();
+}
 
-  if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
-    auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
-    if (I != ObjectForUBPathAndArch.end())
-      return I->second.get();
-
-    Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
-        UB->getMachOObjectForArch(ArchName);
-    if (!ObjOrErr) {
-      ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
-                                     std::unique_ptr<ObjectFile>());
-      return ObjOrErr.takeError();
+Expected<ObjectFile *> LLVMSymbolizer::findOrCacheObject(
+    const ArchiveCacheKey &Key,
+    llvm::function_ref<Expected<std::unique_ptr<ObjectFile>>()> Loader,
+    const std::string &PathForBinaryCache) {
+
+  auto It = ObjectFileCache.find(Key);
+  if (It != ObjectFileCache.end())
+    return It->second.get();
+
+  Expected<std::unique_ptr<ObjectFile>> ObjOrErr = Loader();
+  if (!ObjOrErr) {
+    ObjectFileCache.emplace(Key, std::unique_ptr<ObjectFile>());
+    return ObjOrErr.takeError();
+  }
+
+  ObjectFile *Res = ObjOrErr->get();
+  auto NewEntry = ObjectFileCache.emplace(Key, std::move(*ObjOrErr));
+  auto CacheIter = BinaryForPath.find(PathForBinaryCache);
+  if (CacheIter != BinaryForPath.end())
+    CacheIter->second.pushEvictor(
+        [this, Iter = NewEntry.first]() { ObjectFileCache.erase(Iter); });
+  return Res;
+}
+
+Expected<ObjectFile *> LLVMSymbolizer::getOrCreateObjectFromArchive(
+    StringRef ArchivePath, StringRef MemberName, StringRef ArchName) {
+  Expected<object::Binary *> BinOrErr = loadOrGetBinary(ArchivePath.str());
+  if (!BinOrErr)
+    return BinOrErr.takeError();
+  object::Binary *Bin = *BinOrErr;
+
+  object::Archive *Archive = dyn_cast_if_present<object::Archive>(Bin);
+  if (!Archive)
+    return createStringError(std::errc::invalid_argument,
+                             "'%s' is not a valid archive",
+                             ArchivePath.str().c_str());
+
+  Error Err = Error::success();
+  // On AIX, archives can contain multiple members with the same name but
+  // different types. We need to check all matches and find one that matches
+  // both name and architecture.
+  for (auto &Child : Archive->children(Err, /*SkipInternal=*/true)) {
+    Expected<StringRef> NameOrErr = Child.getName();
+    if (!NameOrErr)
+      continue;
+    if (*NameOrErr == sys::path::filename(MemberName)) {
+      Expected<std::unique_ptr<object::Binary>> MemberOrErr =
+          Child.getAsBinary();
+      if (!MemberOrErr)
+        continue;
+
+      std::unique_ptr<object::Binary> Binary = std::move(*MemberOrErr);
+      if (auto *Obj = dyn_cast<object::ObjectFile>(Binary.get())) {
+        Triple::ArchType ObjArch = Obj->makeTriple().getArch();
+        Triple RequestedTriple;
+        RequestedTriple.setArch(Triple::getArchTypeForLLVMName(ArchName));
+        if (ObjArch != RequestedTriple.getArch())
+          continue;
+
+        ArchiveCacheKey CacheKey{ArchivePath.str(), MemberName.str(),
+                                 ArchName.str()};
+        Expected<ObjectFile *> Res = findOrCacheObject(
+            CacheKey,
+            [O = std::unique_ptr<ObjectFile>(
+                 Obj)]() mutable -> Expected<std::unique_ptr<ObjectFile>> {
+              return std::move(O);
+            },
+            ArchivePath.str());
+        Binary.release();
+        return Res;
+      }
+    }
+  }
+  if (Err)
+    return std::move(Err);
+  return createStringError(std::errc::invalid_argument,
+                           "no matching member '%s' with arch '%s' in '%s'",
+                           MemberName.str().c_str(), ArchName.str().c_str(),
+                           ArchivePath.str().c_str());
+}
+
+Expected<ObjectFile *>
+LLVMSymbolizer::getOrCreateObject(const std::string &Path,
+                                  const std::string &ArchName) {
+  // First check for archive(member) format - more efficient to check closing
+  // paren first.
+  size_t CloseParen = Path.rfind(')');
+  if (CloseParen != std::string::npos && CloseParen == Path.length() - 1) {
----------------
jh7370 wrote:

How is a comparison like `if (!Path.empty())` any more complicated than `if (CloseParen != std::string::npos)`?

Big O notation doesn't really tell the full story - you need to consider the typical case. For all inputs without ')', the entire string has to be scanned with the current behaviour, which will require a minimum of two checks per character (one to check if it matches and one to see if the end has been reached). This will be the most common case, so should be the case we give greatest weight to. For the cases where ')' is present but not the last character, you still have to query at least 2 characters (the last character and the preceding character, if it is ')'), plus associated end checks, then do an additional check (i.e. is it the last character). Even if ')' is the last character, rfind will still have to do 2 checks (the "are we at the end check", then the character comparison), plus 1 additional "is it the last character check". So, to summarise, we have the following number of comparisons in the 3 cases:
1) No ')': 2 * <string length> + 1
2) ')' is present, but not last: 2 * <relative position of ')'> + 1
3) ')' is last: 3 (+ cost of looking up the '(')

My proposal has a maximum of 2 comparisons for ALL cases (not counting the additional rfind call needed if it the case we are interested in).

https://github.com/llvm/llvm-project/pull/150401


More information about the llvm-commits mailing list