[llvm] r338207 - [MS Demangler] Refactor some of the name parsing code.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 28 15:10:43 PDT 2018


Author: zturner
Date: Sat Jul 28 15:10:42 2018
New Revision: 338207

URL: http://llvm.org/viewvc/llvm-project?rev=338207&view=rev
Log:
[MS Demangler] Refactor some of the name parsing code.

There are some very subtle differences between how one should
parse symbol names and type names.  They differ with respect
to back-referencing, the set of legal values that can appear
as the unqualified portion, and various other aspects.

By separating the parsing code into separate paths, we can
remove a lot of ambiguity during the demangling process, which
is necessary for demangling more complicated things like
function local statics, nested classes, and lambdas.

Modified:
    llvm/trunk/lib/Demangle/MicrosoftDemangle.cpp

Modified: llvm/trunk/lib/Demangle/MicrosoftDemangle.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Demangle/MicrosoftDemangle.cpp?rev=338207&r1=338206&r2=338207&view=diff
==============================================================================
--- llvm/trunk/lib/Demangle/MicrosoftDemangle.cpp (original)
+++ llvm/trunk/lib/Demangle/MicrosoftDemangle.cpp Sat Jul 28 15:10:42 2018
@@ -420,8 +420,10 @@ static void outputName(OutputStream &OS,
 
   outputSpaceIfNecessary(OS);
 
+  const Name *Previous = nullptr;
   // Print out namespaces or outer class BackReferences.
   for (; TheName->Next; TheName = TheName->Next) {
+    Previous = TheName;
     OS << TheName->Str;
     outputTemplateParams(OS, *TheName);
     OS << "::";
@@ -435,14 +437,12 @@ static void outputName(OutputStream &OS,
   }
 
   // Print out ctor or dtor.
+  if (TheName->Operator == "dtor")
+    OS << "~";
+
   if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
-    OS << TheName->Str;
-    outputTemplateParams(OS, *TheName);
-    OS << "::";
-    if (TheName->Operator == "dtor")
-      OS << "~";
-    OS << TheName->Str;
-    outputTemplateParams(OS, *TheName);
+    OS << Previous->Str;
+    outputTemplateParams(OS, *Previous);
     return;
   }
 
@@ -755,13 +755,24 @@ private:
   ParamList demangleFunctionParameterList();
 
   int demangleNumber();
-  void demangleNamePiece(Name &Node, bool IsHead);
 
-  StringView demangleString(bool memorize);
   void memorizeString(StringView s);
-  Name *demangleName();
+  Name *demangleFullyQualifiedTypeName();
+  Name *demangleFullyQualifiedSymbolName();
+
+  Name *demangleUnqualifiedTypeName();
+  Name *demangleUnqualifiedSymbolName();
+
+  Name *demangleNameScopeChain(Name *UnqualifiedName);
+  Name *demangleNameScopePiece();
+
+  Name *demangleBackRefName();
+  Name *demangleClassTemplateName();
+  Name *demangleOperatorName();
+  Name *demangleSimpleName(bool Memorize);
+  Name *demangleAnonymousNamespaceName();
+
   void demangleOperator(Name *);
-  StringView demangleOperatorName();
   FuncClass demangleFunctionClass();
   CallingConv demangleCallingConvention();
   StorageClass demangleVariableStorageClass();
@@ -821,7 +832,7 @@ void Demangler::parse() {
 
   // What follows is a main symbol name. This may include
   // namespaces or class BackReferences.
-  SymbolName = demangleName();
+  SymbolName = demangleFullyQualifiedSymbolName();
 
   // Read a variable.
   if (startsWithDigit(MangledName)) {
@@ -861,7 +872,7 @@ Type *Demangler::demangleVariableEncodin
 
     if (Ty->Prim == PrimTy::MemberPtr) {
       assert(IsMember);
-      Name *BackRefName = demangleName();
+      Name *BackRefName = demangleFullyQualifiedTypeName();
       (void)BackRefName;
       MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
       MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
@@ -918,23 +929,6 @@ int Demangler::demangleNumber() {
   return 0;
 }
 
-// Read until the next '@'.
-StringView Demangler::demangleString(bool Memorize) {
-  for (size_t i = 0; i < MangledName.size(); ++i) {
-    if (MangledName[i] != '@')
-      continue;
-    StringView ret = MangledName.substr(0, i);
-    MangledName = MangledName.dropFront(i + 1);
-
-    if (Memorize)
-      memorizeString(ret);
-    return ret;
-  }
-
-  Error = true;
-  return "";
-}
-
 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
 // Memorize it.
 void Demangler::memorizeString(StringView S) {
@@ -946,176 +940,247 @@ void Demangler::memorizeString(StringVie
   BackReferences[BackRefCount++] = S;
 }
 
-void Demangler::demangleNamePiece(Name &Node, bool IsHead) {
-  if (startsWithDigit(MangledName)) {
-    size_t I = MangledName[0] - '0';
-    if (I >= BackRefCount) {
-      Error = true;
-      return;
-    }
-    MangledName = MangledName.dropFront();
-    Node.Str = BackReferences[I];
-  } else if (MangledName.consumeFront("?$")) {
-    // Class template.
-    Node.Str = demangleString(false);
-    Node.TemplateParams = demangleTemplateParameterList();
-  } else if (!IsHead && MangledName.consumeFront("?A")) {
-    // Anonymous namespace starts with ?A.  So does overloaded operator[],
-    // but the distinguishing factor is that namespace themselves are not
-    // mangled, only the variables and functions inside of them are.  So
-    // an anonymous namespace will never occur as the first item in the
-    // name.
-    Node.Str = "`anonymous namespace'";
-    if (!MangledName.consumeFront('@')) {
-      Error = true;
-      return;
-    }
-  } else if (MangledName.consumeFront("?")) {
-    // Overloaded operator.
-    demangleOperator(&Node);
-  } else {
-    // Non-template functions or classes.
-    Node.Str = demangleString(true);
-  }
-}
+Name *Demangler::demangleBackRefName() {
+  assert(startsWithDigit(MangledName));
 
-// Parses a name in the form of A at B@C@@ which represents C::B::A.
-Name *Demangler::demangleName() {
-  Name *Head = nullptr;
-
-  while (!MangledName.consumeFront("@")) {
-    Name *Elem = Arena.alloc<Name>();
-
-    assert(!Error);
-    demangleNamePiece(*Elem, Head == nullptr);
-    if (Error)
-      return nullptr;
-
-    Elem->Next = Head;
-    Head = Elem;
-    if (MangledName.empty()) {
-      Error = true;
-      return nullptr;
-    }
+  size_t I = MangledName[0] - '0';
+  if (I >= BackRefCount) {
+    Error = true;
+    return nullptr;
   }
 
-  return Head;
+  MangledName = MangledName.dropFront();
+  Name *Node = Arena.alloc<Name>();
+  Node->Str = BackReferences[I];
+  return Node;
 }
 
-void Demangler::demangleOperator(Name *OpName) {
-  OpName->Operator = demangleOperatorName();
-  if (!Error && !MangledName.empty() && MangledName.front() != '@')
-    demangleNamePiece(*OpName, false);
+Name *Demangler::demangleClassTemplateName() {
+  assert(MangledName.startsWith("?$"));
+  MangledName.consumeFront("?$");
+
+  Name *Node = demangleSimpleName(false);
+  Node->TemplateParams = demangleTemplateParameterList();
+  return Node;
 }
 
-StringView Demangler::demangleOperatorName() {
-  SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
-  RestoreOnError.shouldRestore(false);
-
-  switch (MangledName.popFront()) {
-  case '0':
-    return "ctor";
-  case '1':
-    return "dtor";
-  case '2':
-    return " new";
-  case '3':
-    return " delete";
-  case '4':
-    return "=";
-  case '5':
-    return ">>";
-  case '6':
-    return "<<";
-  case '7':
-    return "!";
-  case '8':
-    return "==";
-  case '9':
-    return "!=";
-  case 'A':
-    return "[]";
-  case 'C':
-    return "->";
-  case 'D':
-    return "*";
-  case 'E':
-    return "++";
-  case 'F':
-    return "--";
-  case 'G':
-    return "-";
-  case 'H':
-    return "+";
-  case 'I':
-    return "&";
-  case 'J':
-    return "->*";
-  case 'K':
-    return "/";
-  case 'L':
-    return "%";
-  case 'M':
-    return "<";
-  case 'N':
-    return "<=";
-  case 'O':
-    return ">";
-  case 'P':
-    return ">=";
-  case 'Q':
-    return ",";
-  case 'R':
-    return "()";
-  case 'S':
-    return "~";
-  case 'T':
-    return "^";
-  case 'U':
-    return "|";
-  case 'V':
-    return "&&";
-  case 'W':
-    return "||";
-  case 'X':
-    return "*=";
-  case 'Y':
-    return "+=";
-  case 'Z':
-    return "-=";
-  case '_': {
-    if (MangledName.empty())
-      break;
+Name *Demangler::demangleOperatorName() {
+  assert(MangledName.startsWith('?'));
+  MangledName.consumeFront('?');
 
+  auto NameString = [this]() -> StringView {
     switch (MangledName.popFront()) {
     case '0':
-      return "/=";
+      return "ctor";
     case '1':
-      return "%=";
+      return "dtor";
     case '2':
-      return ">>=";
+      return " new";
     case '3':
-      return "<<=";
+      return " delete";
     case '4':
-      return "&=";
+      return "=";
     case '5':
-      return "|=";
+      return ">>";
     case '6':
-      return "^=";
+      return "<<";
+    case '7':
+      return "!";
+    case '8':
+      return "==";
+    case '9':
+      return "!=";
+    case 'A':
+      return "[]";
+    case 'C':
+      return "->";
+    case 'D':
+      return "*";
+    case 'E':
+      return "++";
+    case 'F':
+      return "--";
+    case 'G':
+      return "-";
+    case 'H':
+      return "+";
+    case 'I':
+      return "&";
+    case 'J':
+      return "->*";
+    case 'K':
+      return "/";
+    case 'L':
+      return "%";
+    case 'M':
+      return "<";
+    case 'N':
+      return "<=";
+    case 'O':
+      return ">";
+    case 'P':
+      return ">=";
+    case 'Q':
+      return ",";
+    case 'R':
+      return "()";
+    case 'S':
+      return "~";
+    case 'T':
+      return "^";
     case 'U':
-      return " new[]";
+      return "|";
     case 'V':
-      return " delete[]";
-    case '_':
-      if (MangledName.consumeFront("L"))
-        return " co_await";
+      return "&&";
+    case 'W':
+      return "||";
+    case 'X':
+      return "*=";
+    case 'Y':
+      return "+=";
+    case 'Z':
+      return "-=";
+    case '_': {
+      if (MangledName.empty())
+        break;
+
+      switch (MangledName.popFront()) {
+      case '0':
+        return "/=";
+      case '1':
+        return "%=";
+      case '2':
+        return ">>=";
+      case '3':
+        return "<<=";
+      case '4':
+        return "&=";
+      case '5':
+        return "|=";
+      case '6':
+        return "^=";
+      case 'U':
+        return " new[]";
+      case 'V':
+        return " delete[]";
+      case '_':
+        if (MangledName.consumeFront("L"))
+          return " co_await";
+      }
     }
-  }
+    }
+    Error = true;
+    return "";
+  };
+
+  Name *Node = Arena.alloc<Name>();
+  Node->Operator = NameString();
+  return Node;
+}
+
+Name *Demangler::demangleSimpleName(bool Memorize) {
+  Name *Node = Arena.alloc<Name>();
+  for (size_t i = 0; i < MangledName.size(); ++i) {
+    if (MangledName[i] != '@')
+      continue;
+    Node->Str = MangledName.substr(0, i);
+    MangledName = MangledName.dropFront(i + 1);
+
+    if (Memorize)
+      memorizeString(Node->Str);
+    return Node;
   }
 
   Error = true;
-  RestoreOnError.shouldRestore(true);
-  return "";
+  return nullptr;
+}
+
+Name *Demangler::demangleAnonymousNamespaceName() {
+  assert(MangledName.startsWith("?A"));
+  MangledName.consumeFront("?A");
+
+  Name *Node = Arena.alloc<Name>();
+  Node->Str = "`anonymous namespace'";
+  if (MangledName.consumeFront('@'))
+    return Node;
+
+  Error = true;
+  return nullptr;
+}
+
+// Parses a type name in the form of A at B@C@@ which represents C::B::A.
+Name *Demangler::demangleFullyQualifiedTypeName() {
+  Name *TypeName = demangleUnqualifiedTypeName();
+  assert(TypeName);
+
+  Name *QualName = demangleNameScopeChain(TypeName);
+  assert(QualName);
+  return QualName;
+}
+
+// Parses a symbol name in the form of A at B@C@@ which represents C::B::A.
+// Symbol names have slightly different rules regarding what can appear
+// so we separate out the implementations for flexibility.
+Name *Demangler::demangleFullyQualifiedSymbolName() {
+  Name *SymbolName = demangleUnqualifiedSymbolName();
+  assert(SymbolName);
+
+  Name *QualName = demangleNameScopeChain(SymbolName);
+  assert(QualName);
+  return QualName;
+}
+
+Name *Demangler::demangleUnqualifiedTypeName() {
+  // An inner-most name can be a back-reference, because a fully-qualified name
+  // (e.g. Scope + Inner) can contain other fully qualified names inside of
+  // them (for example template parameters), and these nested parameters can
+  // refer to previously mangled types.
+  if (startsWithDigit(MangledName))
+    return demangleBackRefName();
+
+  if (MangledName.startsWith("?$"))
+    return demangleClassTemplateName();
+
+  return demangleSimpleName(true);
+}
+
+Name *Demangler::demangleUnqualifiedSymbolName() {
+  if (MangledName.startsWith('?'))
+    return demangleOperatorName();
+  return demangleSimpleName(true);
+}
+
+Name *Demangler::demangleNameScopePiece() {
+  if (startsWithDigit(MangledName))
+    return demangleBackRefName();
+
+  if (MangledName.startsWith("?$"))
+    return demangleClassTemplateName();
+
+  if (MangledName.startsWith("?A"))
+    return demangleAnonymousNamespaceName();
+
+  return demangleSimpleName(true);
+}
+
+Name *Demangler::demangleNameScopeChain(Name *UnqualifiedName) {
+  Name *Head = UnqualifiedName;
+
+  while (!MangledName.consumeFront("@")) {
+    if (MangledName.empty()) {
+      Error = true;
+      return nullptr;
+    }
+
+    assert(!Error);
+    Name *Elem = demangleNameScopePiece();
+    if (Error)
+      return nullptr;
+
+    Elem->Next = Head;
+    Head = Elem;
+  }
+  return Head;
 }
 
 FuncClass Demangler::demangleFunctionClass() {
@@ -1440,7 +1505,7 @@ UdtType *Demangler::demangleClassType()
     assert(false);
   }
 
-  UTy->UdtName = demangleName();
+  UTy->UdtName = demangleFullyQualifiedTypeName();
   return UTy;
 }
 
@@ -1498,14 +1563,14 @@ MemberPointerType *Demangler::demangleMe
   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
 
   if (MangledName.consumeFront("8")) {
-    Pointer->MemberName = demangleName();
+    Pointer->MemberName = demangleFullyQualifiedSymbolName();
     Pointer->Pointee = demangleFunctionType(true, true);
   } else {
     Qualifiers PointeeQuals = Q_None;
     bool IsMember = false;
     std::tie(PointeeQuals, IsMember) = demangleQualifiers();
     assert(IsMember);
-    Pointer->MemberName = demangleName();
+    Pointer->MemberName = demangleFullyQualifiedSymbolName();
 
     Pointer->Pointee = demangleType(QualifierMangleMode::Drop);
     Pointer->Pointee->Quals = PointeeQuals;




More information about the llvm-commits mailing list