[clang-tools-extra] b2b993a - [pseudo] Eliminate multiple-specified-types ambiguities using guards

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 25 03:57:45 PDT 2022


Author: Sam McCall
Date: 2022-07-25T12:57:07+02:00
New Revision: b2b993a6ae675955b1112473c035ae6b4e3932a0

URL: https://github.com/llvm/llvm-project/commit/b2b993a6ae675955b1112473c035ae6b4e3932a0
DIFF: https://github.com/llvm/llvm-project/commit/b2b993a6ae675955b1112473c035ae6b4e3932a0.diff

LOG: [pseudo] Eliminate multiple-specified-types ambiguities using guards

Motivating case: `foo bar;` is not a declaration of nothing with `foo` and `bar`
both types.

This is a common and critical ambiguity, clangd/AST.cpp has 20% fewer
ambiguous nodes (1674->1332) after this change.

Differential Revision: https://reviews.llvm.org/D130337

Added: 
    clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp

Modified: 
    clang-tools-extra/pseudo/lib/cxx/CXX.cpp
    clang-tools-extra/pseudo/lib/cxx/cxx.bnf
    clang-tools-extra/pseudo/test/fuzzer.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 7fc3a48d63189..73af0a7a2d52a 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -14,7 +14,9 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Debug.h"
 #include <utility>
+#define DEBUG_TYPE "CXX.cpp"
 
 namespace clang {
 namespace pseudo {
@@ -160,7 +162,106 @@ bool guardNextTokenNotElse(const GuardParams &P) {
   return symbolToToken(P.Lookahead) != tok::kw_else;
 }
 
+// Whether this e.g. decl-specifier contains an "exclusive" type such as a class
+// name, and thus can't combine with a second exclusive type.
+//
+// Returns false for
+//  - non-types
+//  - "unsigned" etc that may suffice as types but may modify others
+//  - cases of uncertainty (e.g. due to ambiguity)
+bool hasExclusiveType(const ForestNode *N) {
+  // FIXME: every time we apply this check, we walk the whole subtree.
+  // Add per-node caching instead.
+  while (true) {
+    assert(N->symbol() == (SymbolID)Symbol::decl_specifier_seq ||
+           N->symbol() == (SymbolID)Symbol::type_specifier_seq ||
+           N->symbol() == (SymbolID)Symbol::defining_type_specifier_seq ||
+           N->symbol() == (SymbolID)Symbol::decl_specifier ||
+           N->symbol() == (SymbolID)Symbol::type_specifier ||
+           N->symbol() == (SymbolID)Symbol::defining_type_specifier ||
+           N->symbol() == (SymbolID)Symbol::simple_type_specifier);
+    if (N->kind() == ForestNode::Opaque)
+      return false; // conservative
+    if (N->kind() == ForestNode::Ambiguous)
+      return llvm::all_of(N->alternatives(), hasExclusiveType); // conservative
+    // All supported symbols are nonterminals.
+    assert(N->kind() == ForestNode::Sequence);
+    switch (N->rule()) {
+      // seq := element seq: check element then continue into seq
+      case (RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq:
+      case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq:
+      case (RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq:
+        if (hasExclusiveType(N->children()[0]))
+          return true;
+        N = N->children()[1];
+        continue;
+      // seq := element: continue into element
+      case (RuleID)Rule::decl_specifier_seq_0decl_specifier:
+      case (RuleID)Rule::type_specifier_seq_0type_specifier:
+      case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier:
+        N = N->children()[0];
+        continue;
+
+      // defining-type-specifier
+      case (RuleID)Rule::defining_type_specifier_0type_specifier:
+        N = N->children()[0];
+        continue;
+      case (RuleID)Rule::defining_type_specifier_0class_specifier:
+      case (RuleID)Rule::defining_type_specifier_0enum_specifier:
+        return true;
+
+      // decl-specifier
+      case (RuleID)Rule::decl_specifier_0defining_type_specifier:
+        N = N->children()[0];
+        continue;
+      case (RuleID)Rule::decl_specifier_0consteval:
+      case (RuleID)Rule::decl_specifier_0constexpr:
+      case (RuleID)Rule::decl_specifier_0constinit:
+      case (RuleID)Rule::decl_specifier_0inline:
+      case (RuleID)Rule::decl_specifier_0friend:
+      case (RuleID)Rule::decl_specifier_0storage_class_specifier:
+      case (RuleID)Rule::decl_specifier_0typedef:
+      case (RuleID)Rule::decl_specifier_0function_specifier:
+        return false;
+
+      // type-specifier
+      case (RuleID)Rule::type_specifier_0elaborated_type_specifier:
+      case (RuleID)Rule::type_specifier_0typename_specifier:
+        return true;
+      case (RuleID)Rule::type_specifier_0simple_type_specifier:
+        N = N->children()[0];
+        continue;
+      case (RuleID)Rule::type_specifier_0cv_qualifier:
+        return false;
+
+      // simple-type-specifier
+      case (RuleID)Rule::simple_type_specifier_0type_name:
+      case (RuleID)Rule::simple_type_specifier_0template_name:
+      case (RuleID)Rule::simple_type_specifier_0builtin_type:
+      case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_2simple_template_id:
+      case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_name:
+      case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1type_name:
+      case (RuleID)Rule::simple_type_specifier_0decltype_specifier:
+      case (RuleID)Rule::simple_type_specifier_0placeholder_type_specifier:
+        return true;
+      case (RuleID)Rule::simple_type_specifier_0long:
+      case (RuleID)Rule::simple_type_specifier_0short:
+      case (RuleID)Rule::simple_type_specifier_0signed:
+      case (RuleID)Rule::simple_type_specifier_0unsigned:
+        return false;
+
+      default:
+        LLVM_DEBUG(llvm::errs() << "Unhandled rule " << N->rule() << "\n");
+        llvm_unreachable("hasExclusiveType be exhaustive!");
+    }
+  }
+}
+
 llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+#define GUARD(cond)                                                            \
+  {                                                                            \
+    [](const GuardParams &P) { return cond; }                                  \
+  }
 #define TOKEN_GUARD(kind, cond)                                                \
   [](const GuardParams& P) {                                                   \
     const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens);                  \
@@ -177,6 +278,16 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
       {(RuleID)Rule::non_function_declarator_0declarator,
        SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))},
 
+      // A {decl,type,defining-type}-specifier-sequence cannot have multiple
+      // "exclusive" types (like class names): a value has only one type.
+      {(RuleID)Rule::
+           defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq,
+       GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
+      {(RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq,
+       GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
+      {(RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq,
+       GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
+
       {(RuleID)Rule::contextual_override_0identifier,
        TOKEN_GUARD(identifier, Tok.text() == "override")},
       {(RuleID)Rule::contextual_final_0identifier,
@@ -190,10 +301,12 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
       {(RuleID)Rule::contextual_zero_0numeric_constant,
        TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
 
-      {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
-        guardNextTokenNotElse},
-      {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
-        guardNextTokenNotElse},
+      {(RuleID)Rule::
+           selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
+       guardNextTokenNotElse},
+      {(RuleID)Rule::
+           selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
+       guardNextTokenNotElse},
 
       // The grammar distinguishes (only) user-defined vs plain string literals,
       // where the clang lexer distinguishes (only) encoding types.

diff  --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index ca8fc01b8f414..8dfab8b9788d9 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -350,7 +350,7 @@ decl-specifier := CONSTEVAL
 decl-specifier := CONSTINIT
 decl-specifier := INLINE
 decl-specifier-seq := decl-specifier
-decl-specifier-seq := decl-specifier decl-specifier-seq
+decl-specifier-seq := decl-specifier decl-specifier-seq [guard]
 storage-class-specifier := STATIC
 storage-class-specifier := THREAD_LOCAL
 storage-class-specifier := EXTERN
@@ -364,31 +364,32 @@ type-specifier := elaborated-type-specifier
 type-specifier := typename-specifier
 type-specifier := cv-qualifier
 type-specifier-seq := type-specifier
-type-specifier-seq := type-specifier type-specifier-seq
+type-specifier-seq := type-specifier type-specifier-seq [guard]
 defining-type-specifier := type-specifier
 defining-type-specifier := class-specifier
 defining-type-specifier := enum-specifier
 defining-type-specifier-seq := defining-type-specifier
-defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq
+defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq [guard]
 simple-type-specifier := nested-name-specifier_opt type-name
 simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id
 simple-type-specifier := decltype-specifier
 simple-type-specifier := placeholder-type-specifier
 simple-type-specifier := nested-name-specifier_opt template-name
-simple-type-specifier := CHAR
-simple-type-specifier := CHAR8_T
-simple-type-specifier := CHAR16_T
-simple-type-specifier := CHAR32_T
-simple-type-specifier := WCHAR_T
-simple-type-specifier := BOOL
+simple-type-specifier := builtin-type
+builtin-type := CHAR
+builtin-type := CHAR8_T
+builtin-type := CHAR16_T
+builtin-type := CHAR32_T
+builtin-type := WCHAR_T
+builtin-type := BOOL
 simple-type-specifier := SHORT
-simple-type-specifier := INT
+builtin-type := INT
 simple-type-specifier := LONG
 simple-type-specifier := SIGNED
 simple-type-specifier := UNSIGNED
-simple-type-specifier := FLOAT
-simple-type-specifier := DOUBLE
-simple-type-specifier := VOID
+builtin-type := FLOAT
+builtin-type := DOUBLE
+builtin-type := VOID
 type-name := class-name
 type-name := enum-name
 type-name := typedef-name

diff  --git a/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
new file mode 100644
index 0000000000000..255e8bedac497
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp
@@ -0,0 +1,27 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+
+// not parsed as Type{foo} Type{bar}
+foo bar;
+// CHECK-NOT: simple-declaration := decl-specifier-seq ;
+// CHECK:    simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK:    ├─decl-specifier-seq~simple-type-specifier
+// CHECK:    ├─init-declarator-list~IDENTIFIER
+// CHECK:    └─;
+// CHECK-NOT: simple-declaration := decl-specifier-seq ;
+
+// not parsed as Type{std} Type{::string} Declarator{s};
+std::string s;
+// CHECK-NOT: nested-name-specifier := ::
+// CHECK:     simple-declaration := decl-specifier-seq init-declarator-list ;
+// CHECK:     ├─decl-specifier-seq~simple-type-specifier := <ambiguous>
+// CHECK:     │ ├─simple-type-specifier := nested-name-specifier type-name
+// CHECK:     │ │ ├─nested-name-specifier := <ambiguous> #1
+// CHECK:     │ │ │ ├─nested-name-specifier := type-name ::
+// CHECK:     │ │ │ └─nested-name-specifier := namespace-name ::
+// CHECK:     │ │ └─type-name
+// CHECK:     │ └─simple-type-specifier := nested-name-specifier template-name
+// CHECK:     │   ├─nested-name-specifier =#1
+// CHECK:     │   └─template-name~IDENTIFIER
+// CHECK:     ├─init-declarator-list~IDENTIFIER
+// CHECK:     └─;
+// CHECK-NOT: nested-name-specifier := ::

diff  --git a/clang-tools-extra/pseudo/test/fuzzer.cpp b/clang-tools-extra/pseudo/test/fuzzer.cpp
index 6f4d093ed2ad3..400746a9d12d5 100644
--- a/clang-tools-extra/pseudo/test/fuzzer.cpp
+++ b/clang-tools-extra/pseudo/test/fuzzer.cpp
@@ -1,4 +1,4 @@
 // RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s
 int x;
 // CHECK: translation-unit := declaration-seq
-// CHECK: simple-type-specifier := INT
+// CHECK: builtin-type := INT


        


More information about the cfe-commits mailing list