[llvm] [llvm-dlltool] Implement the --identify option (PR #127465)

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 02:06:47 PST 2025


================
@@ -158,6 +159,169 @@ bool parseModuleDefinition(StringRef DefFileName, MachineTypes Machine,
   return true;
 }
 
+int printError(llvm::Error E, Twine File) {
+  if (!E)
+    return 0;
+  handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
+    llvm::errs() << "error opening " << File << ": " << EIB.message() << "\n";
+  });
+  return 1;
+}
+
+template <typename Callable>
+int forEachCoff(object::Archive &Archive, StringRef Name, Callable Callback) {
+  Error Err = Error::success();
+  for (auto &C : Archive.children(Err)) {
+    Expected<StringRef> NameOrErr = C.getName();
+    if (!NameOrErr)
+      return printError(NameOrErr.takeError(), Name);
+    StringRef Name = *NameOrErr;
+
+    Expected<MemoryBufferRef> ChildMB = C.getMemoryBufferRef();
+    if (!ChildMB)
+      return printError(ChildMB.takeError(), Name);
+
+    if (identify_magic(ChildMB->getBuffer()) == file_magic::coff_object) {
+      auto Obj = object::COFFObjectFile::create(*ChildMB);
+      if (!Obj)
+        return printError(Obj.takeError(), Name);
+      if (!Callback(*Obj->get(), Name))
+        return 1;
+    }
+  }
+  if (Err)
+    return printError(std::move(Err), Name);
+  return 0;
+}
+
+// To find the named of the imported DLL from an import library, we can either
+// inspect the object files that form the import table entries, or we could
+// just look at the archive member names, for MSVC style import libraries.
+// Looking at the archive member names doesn't work for GNU style import
+// libraries though, while inspecting the import table entries works for
+// both. (MSVC style import libraries contain a couple regular object files
+// for the header/trailers.)
+//
+// This implementation does the same as GNU dlltool does; look at the
+// content of ".idata$7" sections, or for MSVC style libraries, look
+// at ".idata$6$" sections.
+//
+// For GNU style import libraries, there are also other data chunks in sections
+// named ".idata$7" (entries to the IAT or ILT); these are distinguished
+// by seeing that they contain relocations. (They also look like an empty
+// string when looking for null termination.)
+//
+// Alternatively, we could do things differently - look for any .idata$2
+// section; this would be import directory entries. At offset 0xc in them
+// there is the RVA of the import DLL name; look for a relocation at this
+// spot and locate the symbol that it points at. That symbol may either
+// be within the same object file (in the case of MSVC style import libraries)
+// or another object file (in the case of GNU import libraries).
+bool identifyImportName(const COFFObjectFile &Obj, StringRef ObjName,
+                        std::vector<StringRef> &Names, bool IsMsStyleImplib) {
+  StringRef TargetName = IsMsStyleImplib ? ".idata$6" : ".idata$7";
+  for (const auto &S : Obj.sections()) {
+    Expected<StringRef> NameOrErr = S.getName();
+    if (!NameOrErr) {
+      printError(NameOrErr.takeError(), ObjName);
+      return false;
+    }
+    StringRef Name = *NameOrErr;
+    if (Name != TargetName)
+      continue;
+
+    // GNU import libraries contain .idata$7 section in the per function
+    // objects too, but they contain relocations.
+    if (!IsMsStyleImplib && !S.relocations().empty())
+      continue;
+
+    Expected<StringRef> ContentsOrErr = S.getContents();
+    if (!ContentsOrErr) {
+      printError(ContentsOrErr.takeError(), ObjName);
+      return false;
+    }
+    StringRef Contents = *ContentsOrErr;
+    Contents = Contents.substr(0, Contents.find('\0'));
+    if (Contents.empty())
+      continue;
+    Names.push_back(Contents);
+    return true;
+  }
+  return true;
+}
+
+bool objContainsSymbol(const COFFObjectFile &Obj, StringRef ObjName,
+                       StringRef SymbolName, bool &Contains) {
+  Contains = false;
+  for (const auto &S : Obj.symbols()) {
+    Expected<StringRef> NameOrErr = S.getName();
+    if (!NameOrErr) {
+      printError(NameOrErr.takeError(), ObjName);
+      return false;
+    }
+    StringRef Name = *NameOrErr;
+    if (Name == SymbolName) {
+      Contains = true;
+      return true;
+    }
+  }
+  return true;
+}
+
+int doIdentify(StringRef File, bool IdentifyStrict) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeBuf = MemoryBuffer::getFile(
+      File, /*IsText=*/false, /*RequiredNullTerminator=*/false);
+  if (!MaybeBuf)
+    return printError(errorCodeToError(MaybeBuf.getError()), File);
+  if (identify_magic(MaybeBuf.get()->getBuffer()) != file_magic::archive) {
+    llvm::errs() << File << " is not a library\n";
+    return 1;
+  }
+
+  std::unique_ptr<MemoryBuffer> B = std::move(MaybeBuf.get());
+  Error Err = Error::success();
+  object::Archive Archive(B->getMemBufferRef(), Err);
+  if (Err)
+    return printError(std::move(Err), B->getBufferIdentifier());
+
+  bool IsMsStyleImplib = false;
+  if (forEachCoff(Archive, B->getBufferIdentifier(),
+                  [&](const COFFObjectFile &Obj, StringRef ObjName) -> bool {
+                    if (IsMsStyleImplib)
+                      return true;
+                    bool Contains;
+                    if (!objContainsSymbol(
+                            Obj, ObjName, "__NULL_IMPORT_DESCRIPTOR", Contains))
+                      return false;
----------------
mstorsjo wrote:

A lot of the error handling here feels a bit clumsy, feel free to suggest ways to make it more idiomatic or concise. (I've strived to always return things back to the caller, rather than calling `exit()` within the functions on error, to make the code more suitable for moving into a library if relevant.)

https://github.com/llvm/llvm-project/pull/127465


More information about the llvm-commits mailing list