[llvm] 1099208 - [symbolizer] Check existence of input file in GNU mode

Serge Pavlov via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 23 03:21:14 PDT 2023


Author: Serge Pavlov
Date: 2023-06-23T17:20:15+07:00
New Revision: 1099208b991ee2ff2d0c0667fdd668702523a4cc

URL: https://github.com/llvm/llvm-project/commit/1099208b991ee2ff2d0c0667fdd668702523a4cc
DIFF: https://github.com/llvm/llvm-project/commit/1099208b991ee2ff2d0c0667fdd668702523a4cc.diff

LOG: [symbolizer] Check existence of input file in GNU mode

GNU addr2line exits immediately if it cannot open the file specified as
executable/relocatable. In contrast llvm-addr2line does not exit and, if
addresses are not specified in command line, waits for input on stdin. This
causes the test compiler-rt/test/asan/TestCases/Posix/asan-symbolize-bad-path.cc to block
forever on Gentoo (see https://reviews.llvm.org/rG27c4777f41d2ab204c1cf84ff1cccd5ba41354da#1190273).
To fix this issue the behavior llvm-addr2line now exits if
executable/relocatable file cannot be found.

It fixes https://github.com/llvm/llvm-project/issues/42099 (llvm-addr2line
does not exit when passed a non-existent file).

Differential Revision: https://reviews.llvm.org/D147652

Added: 
    llvm/test/tools/llvm-symbolizer/errors.test

Modified: 
    llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
    llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
    llvm/test/tools/llvm-symbolizer/input-base.test
    llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index c633c894a44e9..6f4e48dc6a8f5 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -119,6 +119,8 @@ class LLVMSymbolizer {
     BIDFetcher = std::move(Fetcher);
   }
 
+  Error checkFileExists(StringRef Name);
+
 private:
   // Bundles together object file with code/data and object file with
   // corresponding debug info. These objects can be the same.
@@ -188,6 +190,11 @@ class LLVMSymbolizer {
   /// Update the LRU cache order when a binary is accessed.
   void recordAccess(CachedBinary &Bin);
 
+  /// Split binary file name into file and architecture parts. For example,
+  /// the name 'macho-universal:i386', will be split into 'macho-universal' and
+  /// 'i386'.
+  std::pair<std::string, std::string> splitBinaryFileName(StringRef Name);
+
   std::map<std::string, std::unique_ptr<SymbolizableModule>, std::less<>>
       Modules;
   StringMap<std::string> BuildIDPaths;

diff  --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 459baf7b83640..7de72a5970d91 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -550,19 +550,24 @@ LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
   return InsertResult.first->second.get();
 }
 
-Expected<SymbolizableModule *>
-LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
-  std::string BinaryName = ModuleName;
+std::pair<std::string, std::string>
+LLVMSymbolizer::splitBinaryFileName(StringRef Name) {
+  StringRef BinaryName = Name;
   std::string ArchName = Opts.DefaultArch;
-  size_t ColonPos = ModuleName.find_last_of(':');
-  // Verify that substring after colon form a valid arch name.
-  if (ColonPos != std::string::npos) {
-    std::string ArchStr = ModuleName.substr(ColonPos + 1);
+  size_t ColonPos = Name.find_last_of(':');
+  if (ColonPos != StringRef::npos) {
+    StringRef ArchStr = Name.substr(ColonPos + 1);
     if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
-      BinaryName = ModuleName.substr(0, ColonPos);
+      BinaryName = Name.substr(0, ColonPos);
       ArchName = ArchStr;
     }
   }
+  return std::make_pair(BinaryName.str(), ArchName);
+}
+
+Expected<SymbolizableModule *>
+LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
+  auto [BinaryName, ArchName] = splitBinaryFileName(ModuleName);
 
   auto I = Modules.find(ModuleName);
   if (I != Modules.end()) {
@@ -707,6 +712,15 @@ LLVMSymbolizer::DemangleName(const std::string &Name,
   return Name;
 }
 
+Error LLVMSymbolizer::checkFileExists(StringRef BinName) {
+  auto [BinaryName, ArchName] = splitBinaryFileName(BinName);
+  sys::fs::file_status Stat;
+  std::error_code EC = sys::fs::status(BinaryName, Stat);
+  if (!EC && sys::fs::is_directory(Stat))
+    EC = errc::is_a_directory;
+  return errorCodeToError(EC);
+}
+
 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
   if (Bin->getBinary())
     LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());

diff  --git a/llvm/test/tools/llvm-symbolizer/errors.test b/llvm/test/tools/llvm-symbolizer/errors.test
new file mode 100644
index 0000000000000..e78d1f6ef2ee3
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/errors.test
@@ -0,0 +1,7 @@
+RUN: not llvm-addr2line -e %p/Inputs/nonexistent 0x12 2>&1 | FileCheck %s --check-prefix=CHECK-NONEXISTENT-A2L -DMSG=%errc_ENOENT
+RUN: not llvm-addr2line -e %p/Inputs/nonexistent 2>&1 | FileCheck %s --check-prefix=CHECK-NONEXISTENT-A2L -DMSG=%errc_ENOENT
+CHECK-NONEXISTENT-A2L: llvm-addr2line{{.*}}: error: '{{.*}}Inputs/nonexistent': [[MSG]]
+
+RUN: not llvm-addr2line -e %p/Inputs 0x12 2>&1 | FileCheck %s --check-prefix=CHECK-DIRECTORY-A2L -DMSG=%errc_EISDIR
+RUN: not llvm-addr2line -e %p/Inputs 2>&1 | FileCheck %s --check-prefix=CHECK-DIRECTORY-A2L -DMSG=%errc_EISDIR
+CHECK-DIRECTORY-A2L: llvm-addr2line{{.*}}: error: '{{.*}}Inputs': [[MSG]]

diff  --git a/llvm/test/tools/llvm-symbolizer/input-base.test b/llvm/test/tools/llvm-symbolizer/input-base.test
index 66244a7203c08..82a5f1131dbfa 100644
--- a/llvm/test/tools/llvm-symbolizer/input-base.test
+++ b/llvm/test/tools/llvm-symbolizer/input-base.test
@@ -11,14 +11,14 @@ RUN: llvm-symbolizer -e /dev/null -a 0o11064 | FileCheck %s
 RUN: llvm-symbolizer -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
 
 # llvm-addr2line always requires hexadecimal, but accepts an optional 0x prefix.
-RUN: llvm-addr2line -e /dev/null -a 0x1234 | FileCheck %s
-RUN: llvm-addr2line -e /dev/null -a 0X1234 | FileCheck %s
-RUN: llvm-addr2line -e /dev/null -a 1234 | FileCheck %s
-RUN: llvm-addr2line -e /dev/null -a 01234 | FileCheck %s
-RUN: llvm-addr2line -e /dev/null -a 0b1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
-RUN: llvm-addr2line -e /dev/null -a 0B1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
-RUN: llvm-addr2line -e /dev/null -a 0o1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-LOWER
-RUN: llvm-addr2line -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 0x1234 | FileCheck %s
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 0X1234 | FileCheck %s
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 1234 | FileCheck %s
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 01234 | FileCheck %s
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 0b1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 0B1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 0o1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-LOWER
+RUN: llvm-addr2line -e %p/Inputs/addr.exe -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
 
 CHECK: 0x1234
 CHECK-NEXT: ??

diff  --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index bcbf2847d5292..7b49ec2f13808 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -478,6 +478,17 @@ int main(int argc, char **argv) {
   else
     Printer = std::make_unique<LLVMPrinter>(outs(), printError, Config);
 
+  StringRef InputFile = Args.getLastArgValue(OPT_obj_EQ);
+  if (!InputFile.empty() && IsAddr2Line) {
+    Error Status = Symbolizer.checkFileExists(InputFile);
+    if (Status) {
+      handleAllErrors(std::move(Status), [&](const ErrorInfoBase &EI) {
+        printError(EI, InputFile);
+      });
+      return EXIT_FAILURE;
+    }
+  }
+
   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
   if (InputAddresses.empty()) {
     const int kMaxInputStringLength = 1024;


        


More information about the llvm-commits mailing list