[llvm] 6184e56 - [demangler][NFC] Refactor some parsing

Nathan Sidwell via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 24 05:31:20 PST 2022


Author: Nathan Sidwell
Date: 2022-01-24T05:28:38-08:00
New Revision: 6184e565ad4065026bc121fd13d6a8743a1fe593

URL: https://github.com/llvm/llvm-project/commit/6184e565ad4065026bc121fd13d6a8743a1fe593
DIFF: https://github.com/llvm/llvm-project/commit/6184e565ad4065026bc121fd13d6a8743a1fe593.diff

LOG: [demangler][NFC] Refactor some parsing

There's some unnecessary code duplication in the parser.  This
refactors that and deploys boolean variables to avoid the duplication.
These also happen to help adding module demangling (with an updated
mangling scheme).

1a) The grammar requires some lookahead concerning <template-args>. We
may discover an <unscoped-name> is actually <unscoped-template-name>
<template-args>.  (When <unscoped-name> was a substitution, there must
be a following <template-args>.)  Refactor parseName to only have one
code path looking for the 'I' indicating <template-args>.

1b) While there I altered the control flow to hold the result in a
variable, rather than tail call.  Made it easier to debug (and of
course an optimizer will DTRT here anyway).

2a) An <unscoped-name> can have an St or StL prefix.  No need for
completely separate code paths handling the following unqualified-name
though.

2b) Also no need to look for both 'St' and 'StL' separately.  Look for
'St' and then conditionally swallow an 'L'.

3) We get a similar issue as #1a when parsing a typeName.  Here I just
change the control flow slightly to bring the 'break' out to the end
of the 'S' block and embed the early return inside an if.  That's more
in keeping with the code style.

4) Although NFC, there's a new testcase as that's not covered by the
existing demangler tests and is significant in the #1a case above.

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D117879

Added: 
    

Modified: 
    libcxxabi/src/demangle/ItaniumDemangle.h
    libcxxabi/test/test_demangle.pass.cpp
    llvm/include/llvm/Demangle/ItaniumDemangle.h

Removed: 
    


################################################################################
diff  --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 01f414a7257bf..45bb741f5629f 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -2593,34 +2593,38 @@ Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
   if (look() == 'Z')
     return getDerived().parseLocalName(State);
 
-  //        ::= <unscoped-template-name> <template-args>
-  if (look() == 'S' && look(1) != 't') {
-    Node *S = getDerived().parseSubstitution();
-    if (S == nullptr)
-      return nullptr;
-    if (look() != 'I')
-      return nullptr;
-    Node *TA = getDerived().parseTemplateArgs(State != nullptr);
-    if (TA == nullptr)
-      return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(S, TA);
+  Node *Result = nullptr;
+  bool IsSubst = look() == 'S' && look(1) != 't';
+  if (IsSubst) {
+    // A substitution must lead to:
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseSubstitution();
+  } else {
+    // An unscoped name can be one of:
+    //        ::= <unscoped-name>
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseUnscopedName(State);
   }
-
-  Node *N = getDerived().parseUnscopedName(State);
-  if (N == nullptr)
+  if (Result == nullptr)
     return nullptr;
-  //        ::= <unscoped-template-name> <template-args>
+
   if (look() == 'I') {
-    Subs.push_back(N);
+    //        ::= <unscoped-template-name> <template-args>
+    if (!IsSubst)
+      // An unscoped-template-name is substitutable.
+      Subs.push_back(Result);
     Node *TA = getDerived().parseTemplateArgs(State != nullptr);
     if (TA == nullptr)
       return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(N, TA);
+    if (State)
+      State->EndsWithTemplateArgs = true;
+    Result = make<NameWithTemplateArgs>(Result, TA);
+  } else if (IsSubst) {
+    // The substitution case must be followed by <template-args>.
+    return nullptr;
   }
-  //        ::= <unscoped-name>
-  return N;
+
+  return Result;
 }
 
 // <local-name> := Z <function encoding> E <entity name> [<discriminator>]
@@ -2665,13 +2669,17 @@ Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
 template <typename Derived, typename Alloc>
 Node *
 AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
-  if (consumeIf("StL") || consumeIf("St")) {
-    Node *R = getDerived().parseUnqualifiedName(State);
-    if (R == nullptr)
-      return nullptr;
-    return make<StdQualifiedName>(R);
-  }
-  return getDerived().parseUnqualifiedName(State);
+  bool IsStd = consumeIf("St");
+  if (IsStd)
+    consumeIf('L');
+
+  Node *Result = getDerived().parseUnqualifiedName(State);
+  if (Result == nullptr)
+    return nullptr;
+  if (IsStd)
+    Result = make<StdQualifiedName>(Result);
+
+  return Result;
 }
 
 // <unqualified-name> ::= <operator-name> [abi-tags]
@@ -4066,9 +4074,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
   }
   //             ::= <substitution>  # See Compression below
   case 'S': {
-    if (look(1) && look(1) != 't') {
-      Node *Sub = getDerived().parseSubstitution();
-      if (Sub == nullptr)
+    if (look(1) != 't') {
+      Result = getDerived().parseSubstitution();
+      if (Result == nullptr)
         return nullptr;
 
       // Sub could be either of:
@@ -4085,13 +4093,13 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
         Node *TA = getDerived().parseTemplateArgs();
         if (TA == nullptr)
           return nullptr;
-        Result = make<NameWithTemplateArgs>(Sub, TA);
-        break;
+        Result = make<NameWithTemplateArgs>(Result, TA);
+      } else {
+        // If all we parsed was a substitution, don't re-insert into the
+        // substitution table.
+        return Result;
       }
-
-      // If all we parsed was a substitution, don't re-insert into the
-      // substitution table.
-      return Sub;
+      break;
     }
     DEMANGLE_FALLTHROUGH;
   }

diff  --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index b1bc05661144a..7bbb17c581cc1 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -29855,6 +29855,8 @@ const char* cases[][2] =
     // This should be invalid, but it is currently not recognized as such
     // See https://llvm.org/PR51407
     {"_Zcv1BIRT_EIS1_E", "operator B<><>"},
+
+    {"_Z3TPLIiET_S0_", "int TPL<int>(int)"},
 };
 
 const unsigned N = sizeof(cases) / sizeof(cases[0]);

diff  --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 98ff73e21cca6..a3a0381edf97d 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -2595,34 +2595,38 @@ Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
   if (look() == 'Z')
     return getDerived().parseLocalName(State);
 
-  //        ::= <unscoped-template-name> <template-args>
-  if (look() == 'S' && look(1) != 't') {
-    Node *S = getDerived().parseSubstitution();
-    if (S == nullptr)
-      return nullptr;
-    if (look() != 'I')
-      return nullptr;
-    Node *TA = getDerived().parseTemplateArgs(State != nullptr);
-    if (TA == nullptr)
-      return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(S, TA);
+  Node *Result = nullptr;
+  bool IsSubst = look() == 'S' && look(1) != 't';
+  if (IsSubst) {
+    // A substitution must lead to:
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseSubstitution();
+  } else {
+    // An unscoped name can be one of:
+    //        ::= <unscoped-name>
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseUnscopedName(State);
   }
-
-  Node *N = getDerived().parseUnscopedName(State);
-  if (N == nullptr)
+  if (Result == nullptr)
     return nullptr;
-  //        ::= <unscoped-template-name> <template-args>
+
   if (look() == 'I') {
-    Subs.push_back(N);
+    //        ::= <unscoped-template-name> <template-args>
+    if (!IsSubst)
+      // An unscoped-template-name is substitutable.
+      Subs.push_back(Result);
     Node *TA = getDerived().parseTemplateArgs(State != nullptr);
     if (TA == nullptr)
       return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(N, TA);
+    if (State)
+      State->EndsWithTemplateArgs = true;
+    Result = make<NameWithTemplateArgs>(Result, TA);
+  } else if (IsSubst) {
+    // The substitution case must be followed by <template-args>.
+    return nullptr;
   }
-  //        ::= <unscoped-name>
-  return N;
+
+  return Result;
 }
 
 // <local-name> := Z <function encoding> E <entity name> [<discriminator>]
@@ -2667,13 +2671,17 @@ Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
 template <typename Derived, typename Alloc>
 Node *
 AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
-  if (consumeIf("StL") || consumeIf("St")) {
-    Node *R = getDerived().parseUnqualifiedName(State);
-    if (R == nullptr)
-      return nullptr;
-    return make<StdQualifiedName>(R);
-  }
-  return getDerived().parseUnqualifiedName(State);
+  bool IsStd = consumeIf("St");
+  if (IsStd)
+    consumeIf('L');
+
+  Node *Result = getDerived().parseUnqualifiedName(State);
+  if (Result == nullptr)
+    return nullptr;
+  if (IsStd)
+    Result = make<StdQualifiedName>(Result);
+
+  return Result;
 }
 
 // <unqualified-name> ::= <operator-name> [abi-tags]
@@ -4068,9 +4076,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
   }
   //             ::= <substitution>  # See Compression below
   case 'S': {
-    if (look(1) && look(1) != 't') {
-      Node *Sub = getDerived().parseSubstitution();
-      if (Sub == nullptr)
+    if (look(1) != 't') {
+      Result = getDerived().parseSubstitution();
+      if (Result == nullptr)
         return nullptr;
 
       // Sub could be either of:
@@ -4087,13 +4095,13 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
         Node *TA = getDerived().parseTemplateArgs();
         if (TA == nullptr)
           return nullptr;
-        Result = make<NameWithTemplateArgs>(Sub, TA);
-        break;
+        Result = make<NameWithTemplateArgs>(Result, TA);
+      } else {
+        // If all we parsed was a substitution, don't re-insert into the
+        // substitution table.
+        return Result;
       }
-
-      // If all we parsed was a substitution, don't re-insert into the
-      // substitution table.
-      return Sub;
+      break;
     }
     DEMANGLE_FALLTHROUGH;
   }


        


More information about the llvm-commits mailing list