[llvm] r244512 - [llvm-symbolizer] Remove underscores and other C mangling on Windows

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 10 14:47:11 PDT 2015


Author: rnk
Date: Mon Aug 10 16:47:11 2015
New Revision: 244512

URL: http://llvm.org/viewvc/llvm-project?rev=244512&view=rev
Log:
[llvm-symbolizer] Remove underscores and other C mangling on Windows

Summary:
This makes it so that reports symbolized after the fact with
llvm-symbolizer are more similar to the ones we generate at runtime with
in-process dbghelp.

Reviewers: samsonov

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D11785

Modified:
    llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
    llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe
    llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
    llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb
    llvm/trunk/test/tools/llvm-symbolizer/pdb/pdb.test
    llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.cpp
    llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.h

Modified: llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp (original)
+++ llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp Mon Aug 10 16:47:11 2015
@@ -16,3 +16,10 @@ int main() {
   NS::Foo f;
   f.bar();
 }
+
+extern "C" {
+void __cdecl foo_cdecl() {}
+void __stdcall foo_stdcall() {}
+void __fastcall foo_fastcall() {}
+void __vectorcall foo_vectorcall() {}
+}

Modified: llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
Binary files llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe (original) and llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe Mon Aug 10 16:47:11 2015 differ

Modified: llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input (original)
+++ llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input Mon Aug 10 16:47:11 2015
@@ -1,4 +1,8 @@
+0x401000
+0x401010
+0x401070
 0x401030
 0x401040
+0x401050
 0x401060
 0x500000

Modified: llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
Binary files llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb (original) and llvm/trunk/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb Mon Aug 10 16:47:11 2015 differ

Modified: llvm/trunk/test/tools/llvm-symbolizer/pdb/pdb.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-symbolizer/pdb/pdb.test?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-symbolizer/pdb/pdb.test (original)
+++ llvm/trunk/test/tools/llvm-symbolizer/pdb/pdb.test Mon Aug 10 16:47:11 2015
@@ -1,18 +1,26 @@
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < "%p/Inputs/test.exe.input" | \
-RUN:    FileCheck %s --check-prefix=CHECK
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < \
-RUN:    "%p/Inputs/test.exe.input" | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
-
-CHECK: foo(void)
-CHECK-NEXT: test.cpp:10
-CHECK: _main
-CHECK-NEXT: test.cpp:13:0
-CHECK: NS::Foo::bar(void)
-CHECK-NEXT: test.cpp:6:0
-
-CHECK-NO-DEMANGLE: foo
-CHECK-NO-DEMANGLE-NEXT: test.cpp:10
-CHECK-NO-DEMANGLE: _main
-CHECK-LINKAGE-NAME-NEXT: test.cpp:13:0
-CHECK-NO-DEMANGLE: bar
-CHECK-LINKAGE-NAME-NEXT: test.cpp:6:0
+RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < "%p/Inputs/test.exe.input" | \
+RUN:    FileCheck %s --check-prefix=CHECK
+RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < \
+RUN:    "%p/Inputs/test.exe.input" | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
+CHECK: foo(void)
+CHECK-NEXT: test.cpp:10
+CHECK: main
+CHECK-NEXT: test.cpp:13:0
+CHECK: NS::Foo::bar(void)
+CHECK-NEXT: test.cpp:6:0
+CHECK: {{^foo_cdecl$}}
+CHECK: {{^foo_stdcall$}}
+CHECK: {{^foo_fastcall$}}
+CHECK: {{^foo_vectorcall$}}
+
+CHECK-NO-DEMANGLE: ?foo@@YAXXZ
+CHECK-NO-DEMANGLE-NEXT: test.cpp:10
+CHECK-NO-DEMANGLE: _main
+CHECK-NO-DEMANGLE-NEXT: test.cpp:13
+CHECK-NO-DEMANGLE: ?bar at Foo@NS@@QAEXXZ
+CHECK-NO-DEMANGLE-NEXT: test.cpp:6
+CHECK-NO-DEMANGLE: _foo_cdecl
+CHECK-NO-DEMANGLE: _foo_stdcall at 0
+CHECK-NO-DEMANGLE: @foo_fastcall at 0
+CHECK-NO-DEMANGLE: foo_vectorcall@@0

Modified: llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.cpp?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.cpp (original)
+++ llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.cpp Mon Aug 10 16:47:11 2015
@@ -20,6 +20,7 @@
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/DataExtractor.h"
@@ -34,6 +35,11 @@
 #include <Windows.h>
 #include <DbgHelp.h>
 #pragma comment(lib, "dbghelp.lib")
+
+// Windows.h conflicts with our COFF header definitions.
+#ifdef IMAGE_FILE_MACHINE_I386
+#undef IMAGE_FILE_MACHINE_I386
+#endif
 #endif
 
 namespace llvm {
@@ -114,6 +120,12 @@ void ModuleInfo::addSymbol(const SymbolR
   M.insert(std::make_pair(SD, SymbolName));
 }
 
+// Return true if this is a 32-bit x86 PE COFF module.
+bool ModuleInfo::isWin32Module() const {
+  auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
+  return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
+}
+
 bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
                                         std::string &Name, uint64_t &Addr,
                                         uint64_t &Size) const {
@@ -197,7 +209,7 @@ std::string LLVMSymbolizer::symbolizeCod
                                           uint64_t ModuleOffset) {
   ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
   if (!Info)
-    return printDILineInfo(DILineInfo());
+    return printDILineInfo(DILineInfo(), Info);
   if (Opts.PrintInlining) {
     DIInliningInfo InlinedContext =
         Info->symbolizeInlinedCode(ModuleOffset, Opts);
@@ -206,12 +218,12 @@ std::string LLVMSymbolizer::symbolizeCod
     std::string Result;
     for (uint32_t i = 0; i < FramesNum; i++) {
       DILineInfo LineInfo = InlinedContext.getFrame(i);
-      Result += printDILineInfo(LineInfo);
+      Result += printDILineInfo(LineInfo, Info);
     }
     return Result;
   }
   DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
-  return printDILineInfo(LineInfo);
+  return printDILineInfo(LineInfo, Info);
 }
 
 std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
@@ -222,7 +234,7 @@ std::string LLVMSymbolizer::symbolizeDat
   if (Opts.UseSymbolTable) {
     if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
       if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
-        Name = DemangleName(Name);
+        Name = DemangleName(Name, Info);
     }
   }
   std::stringstream ss;
@@ -474,7 +486,8 @@ LLVMSymbolizer::getOrCreateModuleInfo(co
   return Info;
 }
 
-std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
+std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo,
+                                            ModuleInfo *ModInfo) const {
   // By default, DILineInfo contains "<invalid>" for function/filename it
   // cannot fetch. We replace it to "??" to make our output closer to addr2line.
   static const std::string kDILineInfoBadString = "<invalid>";
@@ -484,7 +497,7 @@ std::string LLVMSymbolizer::printDILineI
     if (FunctionName == kDILineInfoBadString)
       FunctionName = kBadString;
     else if (Opts.Demangle)
-      FunctionName = DemangleName(FunctionName);
+      FunctionName = DemangleName(FunctionName, ModInfo);
     Result << FunctionName << "\n";
   }
   std::string Filename = LineInfo.FileName;
@@ -494,38 +507,73 @@ std::string LLVMSymbolizer::printDILineI
   return Result.str();
 }
 
+// Undo these various manglings for Win32 extern "C" functions:
+// cdecl       - _foo
+// stdcall     - _foo at 12
+// fastcall    - @foo at 12
+// vectorcall  - foo@@12
+// These are all different linkage names for 'foo'.
+static StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
+  // Remove any '_' or '@' prefix.
+  char Front = SymbolName.empty() ? '\0' : SymbolName[0];
+  if (Front == '_' || Front == '@')
+    SymbolName = SymbolName.drop_front();
+
+  // Remove any '@[0-9]+' suffix.
+  if (Front != '?') {
+    size_t AtPos = SymbolName.rfind('@');
+    if (AtPos != StringRef::npos &&
+        std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
+                    [](char C) { return C >= '0' && C <= '9'; })) {
+      SymbolName = SymbolName.substr(0, AtPos);
+    }
+  }
+
+  // Remove any ending '@' for vectorcall.
+  if (SymbolName.endswith("@"))
+    SymbolName = SymbolName.drop_back();
+
+  return SymbolName;
+}
+
 #if !defined(_MSC_VER)
 // Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
 extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
                                 size_t *length, int *status);
 #endif
 
-std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
+std::string LLVMSymbolizer::DemangleName(const std::string &Name,
+                                         ModuleInfo *ModInfo) {
 #if !defined(_MSC_VER)
   // We can spoil names of symbols with C linkage, so use an heuristic
   // approach to check if the name should be demangled.
-  if (Name.substr(0, 2) != "_Z")
-    return Name;
-  int status = 0;
-  char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
-  if (status != 0)
-    return Name;
-  std::string Result = DemangledName;
-  free(DemangledName);
-  return Result;
+  if (Name.substr(0, 2) == "_Z") {
+    int status = 0;
+    char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
+    if (status != 0)
+      return Name;
+    std::string Result = DemangledName;
+    free(DemangledName);
+    return Result;
+  }
 #else
-  char DemangledName[1024] = {0};
-  DWORD result = ::UnDecorateSymbolName(
-      Name.c_str(), DemangledName, 1023,
-      UNDNAME_NO_ACCESS_SPECIFIERS |       // Strip public, private, protected
-          UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
-          UNDNAME_NO_THROW_SIGNATURES |    // Strip throw() specifications
-          UNDNAME_NO_MEMBER_TYPE |      // Strip virtual, static, etc specifiers
-          UNDNAME_NO_MS_KEYWORDS |      // Strip all MS extension keywords
-          UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
-
-  return (result == 0) ? Name : std::string(DemangledName);
+  if (!Name.empty() && Name.front() == '?') {
+    // Only do MSVC C++ demangling on symbols starting with '?'.
+    char DemangledName[1024] = {0};
+    DWORD result = ::UnDecorateSymbolName(
+        Name.c_str(), DemangledName, 1023,
+        UNDNAME_NO_ACCESS_SPECIFIERS |       // Strip public, private, protected
+            UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
+            UNDNAME_NO_THROW_SIGNATURES |    // Strip throw() specifications
+            UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
+            UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
+            UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
+    return (result == 0) ? Name : std::string(DemangledName);
+  }
 #endif
+  if (ModInfo->isWin32Module())
+    return std::string(demanglePE32ExternCFunc(Name));
+  return Name;
 }
 
 } // namespace symbolize

Modified: llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.h?rev=244512&r1=244511&r2=244512&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.h (original)
+++ llvm/trunk/tools/llvm-symbolizer/LLVMSymbolize.h Mon Aug 10 16:47:11 2015
@@ -63,7 +63,8 @@ public:
   std::string
   symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset);
   void flush();
-  static std::string DemangleName(const std::string &Name);
+  static std::string DemangleName(const std::string &Name, ModuleInfo *ModInfo);
+
 private:
   typedef std::pair<ObjectFile*, ObjectFile*> ObjectPair;
 
@@ -78,7 +79,7 @@ private:
   /// universal binary (or the binary itself if it is an object file).
   ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName);
 
-  std::string printDILineInfo(DILineInfo LineInfo) const;
+  std::string printDILineInfo(DILineInfo LineInfo, ModuleInfo *ModInfo) const;
 
   // Owns all the parsed binaries and object files.
   SmallVector<std::unique_ptr<Binary>, 4> ParsedBinariesAndObjects;
@@ -113,6 +114,9 @@ public:
   bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start,
                      uint64_t &Size) const;
 
+  // Return true if this is a 32-bit x86 PE COFF module.
+  bool isWin32Module() const;
+
 private:
   bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
                               std::string &Name, uint64_t &Addr,




More information about the llvm-commits mailing list