[clang] 9220150 - clang-format: [JS] handle pseudo-keywords.

Martin Probst via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 6 08:13:14 PDT 2020


Author: Martin Probst
Date: 2020-04-06T17:13:04+02:00
New Revision: 92201505cdecd8ba5795f5e33821c49dc57f0816

URL: https://github.com/llvm/llvm-project/commit/92201505cdecd8ba5795f5e33821c49dc57f0816
DIFF: https://github.com/llvm/llvm-project/commit/92201505cdecd8ba5795f5e33821c49dc57f0816.diff

LOG: clang-format: [JS] handle pseudo-keywords.

Summary:
The previous change in https://reviews.llvm.org/D77311 attempted to
detect more C++ keywords. However it also precisely detected all
JavaScript keywords. That's generally correct, but many JavaScripy
keywords, e.g. `get`, are so-called pseudo-keywords. They can be used in
positions where a keyword would never be legal, e.g. in a dotted
expression:

    x.type;  // type is a pseudo-keyword, but can be used here.
    x.get;   // same for get etc.

This change introduces an additional parameter to
`IsJavaScriptIdentifier`, allowing clients to toggle whether they want
to allow `IdentifierName` tokens, i.e. pseudo-keywords.

Reviewers: krasimir

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D77548

Added: 
    

Modified: 
    clang/lib/Format/FormatToken.h
    clang/lib/Format/TokenAnnotator.cpp
    clang/unittests/Format/FormatTestJS.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 48ec7602c21c..c67cf192ab1f 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -909,7 +909,11 @@ struct AdditionalKeywords {
 
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
   /// \c false if it is a keyword or a pseudo keyword.
-  bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
+  /// If \c AcceptIdentifierName is true, returns true not only for keywords,
+  // but also for IdentifierName tokens (aka pseudo-keywords), such as
+  // ``yield``.
+  bool IsJavaScriptIdentifier(const FormatToken &Tok,
+                              bool AcceptIdentifierName = true) const {
     // Based on the list of JavaScript & TypeScript keywords here:
     // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74
     switch (Tok.Tok.getKind()) {
@@ -946,11 +950,14 @@ struct AdditionalKeywords {
     case tok::kw_while:
       // These are JS keywords that are lexed by LLVM/clang as keywords.
       return false;
-    case tok::identifier:
+    case tok::identifier: {
       // For identifiers, make sure they are true identifiers, excluding the
       // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords).
-      return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
-             JsExtraKeywords.end();
+      bool IsPseudoKeyword =
+          JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) !=
+          JsExtraKeywords.end();
+      return AcceptIdentifierName || !IsPseudoKeyword;
+    }
     default:
       // Other keywords are handled in the switch below, to avoid problems due
       // to duplicate case labels when using the #include trick.

diff  --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 029741c3dce7..8204623645a4 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1522,9 +1522,11 @@ class AnnotatingParser {
     if (Style.Language == FormatStyle::LK_JavaScript) {
       if (Current.is(tok::exclaim)) {
         if (Current.Previous &&
-            (Keywords.IsJavaScriptIdentifier(*Current.Previous) ||
-             Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren,
-                                       tok::r_square, tok::r_brace) ||
+            (Keywords.IsJavaScriptIdentifier(
+                 *Current.Previous, /* AcceptIdentifierName= */ true) ||
+             Current.Previous->isOneOf(
+                 tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace,
+                 Keywords.kw_type, Keywords.kw_get, Keywords.kw_set) ||
              Current.Previous->Tok.isLiteral())) {
           Current.Type = TT_JsNonNullAssertion;
           return;
@@ -3071,7 +3073,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
       return false;
     // In tagged template literals ("html`bar baz`"), there is no space between
     // the tag identifier and the template string.
-    if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString))
+    if (Keywords.IsJavaScriptIdentifier(Left,
+                                        /* AcceptIdentifierName= */ false) &&
+        Right.is(TT_TemplateString))
       return false;
     if (Right.is(tok::star) &&
         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))

diff  --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp
index 3fd795c526b1..eadea35f051a 100644
--- a/clang/unittests/Format/FormatTestJS.cpp
+++ b/clang/unittests/Format/FormatTestJS.cpp
@@ -2412,6 +2412,11 @@ TEST_F(FormatTestJS, CppKeywords) {
   verifyFormat("using!;");
   verifyFormat("virtual!;");
   verifyFormat("wchar_t!;");
+
+  // Positive tests:
+  verifyFormat("x.type!;");
+  verifyFormat("x.get!;");
+  verifyFormat("x.set!;");
 }
 
 TEST_F(FormatTestJS, NullPropagatingOperator) {


        


More information about the cfe-commits mailing list