[clang] 146d685 - clang-format: [JS] detect C++ keywords.

Martin Probst via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 3 05:24:07 PDT 2020


Author: Martin Probst
Date: 2020-04-03T14:23:56+02:00
New Revision: 146d685cd657399a4698015f16cc5910cc828728

URL: https://github.com/llvm/llvm-project/commit/146d685cd657399a4698015f16cc5910cc828728
DIFF: https://github.com/llvm/llvm-project/commit/146d685cd657399a4698015f16cc5910cc828728.diff

LOG: clang-format: [JS] detect C++ keywords.

Summary:
C++ defines a number of keywords that are regular identifiers in
JavaScript, e.g. `concept`:

    const concept = 1; // legit JS

This change expands the existing `IsJavaScriptIdentifier(Tok)` function
to return false for C++ keywords that aren't keywords in JS.

Reviewers: krasimir

Subscribers: jfb, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D77311

Added: 
    

Modified: 
    clang/lib/Format/FormatToken.h
    clang/lib/Format/TokenAnnotator.cpp
    clang/unittests/Format/FormatTestJS.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 10a5f0e96f96..48ec7602c21c 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -910,9 +910,64 @@ struct AdditionalKeywords {
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
   /// \c false if it is a keyword or a pseudo keyword.
   bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
-    return Tok.is(tok::identifier) &&
-           JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
-               JsExtraKeywords.end();
+    // Based on the list of JavaScript & TypeScript keywords here:
+    // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74
+    switch (Tok.Tok.getKind()) {
+    case tok::kw_break:
+    case tok::kw_case:
+    case tok::kw_catch:
+    case tok::kw_class:
+    case tok::kw_continue:
+    case tok::kw_const:
+    case tok::kw_default:
+    case tok::kw_delete:
+    case tok::kw_do:
+    case tok::kw_else:
+    case tok::kw_enum:
+    case tok::kw_export:
+    case tok::kw_false:
+    case tok::kw_for:
+    case tok::kw_if:
+    case tok::kw_import:
+    case tok::kw_module:
+    case tok::kw_new:
+    case tok::kw_private:
+    case tok::kw_protected:
+    case tok::kw_public:
+    case tok::kw_return:
+    case tok::kw_static:
+    case tok::kw_switch:
+    case tok::kw_this:
+    case tok::kw_throw:
+    case tok::kw_true:
+    case tok::kw_try:
+    case tok::kw_typeof:
+    case tok::kw_void:
+    case tok::kw_while:
+      // These are JS keywords that are lexed by LLVM/clang as keywords.
+      return false;
+    case tok::identifier:
+      // For identifiers, make sure they are true identifiers, excluding the
+      // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords).
+      return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
+             JsExtraKeywords.end();
+    default:
+      // Other keywords are handled in the switch below, to avoid problems due
+      // to duplicate case labels when using the #include trick.
+      break;
+    }
+
+    switch (Tok.Tok.getKind()) {
+      // Handle C++ keywords not included above: these are all JS identifiers.
+#define KEYWORD(X, Y) case tok::kw_##X:
+#include "clang/Basic/TokenKinds.def"
+      // #undef KEYWORD is not needed -- it's #undef-ed at the end of
+      // TokenKinds.def
+      return true;
+    default:
+      // All other tokens (punctuation etc) are not JS identifiers.
+      return false;
+    }
   }
 
   /// Returns \c true if \p Tok is a C# keyword, returns

diff  --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index a3cd4f42f8f8..029741c3dce7 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1522,9 +1522,9 @@ class AnnotatingParser {
     if (Style.Language == FormatStyle::LK_JavaScript) {
       if (Current.is(tok::exclaim)) {
         if (Current.Previous &&
-            (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
-                                       tok::r_paren, tok::r_square,
-                                       tok::r_brace) ||
+            (Keywords.IsJavaScriptIdentifier(*Current.Previous) ||
+             Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren,
+                                       tok::r_square, tok::r_brace) ||
              Current.Previous->Tok.isLiteral())) {
           Current.Type = TT_JsNonNullAssertion;
           return;
@@ -3070,10 +3070,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
         (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
       return false;
     // In tagged template literals ("html`bar baz`"), there is no space between
-    // the tag identifier and the template string. getIdentifierInfo makes sure
-    // that the identifier is not a pseudo keyword like `yield`, either.
-    if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
-        Right.is(TT_TemplateString))
+    // the tag identifier and the template string.
+    if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString))
       return false;
     if (Right.is(tok::star) &&
         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))

diff  --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp
index 6efb8662f0f5..3fd795c526b1 100644
--- a/clang/unittests/Format/FormatTestJS.cpp
+++ b/clang/unittests/Format/FormatTestJS.cpp
@@ -386,13 +386,6 @@ TEST_F(FormatTestJS, ReservedWordsParenthesized) {
                "return (x);\n");
 }
 
-TEST_F(FormatTestJS, CppKeywords) {
-  // Make sure we don't mess stuff up because of C++ keywords.
-  verifyFormat("return operator && (aa);");
-  // .. or QT ones.
-  verifyFormat("slots: Slot[];");
-}
-
 TEST_F(FormatTestJS, ES6DestructuringAssignment) {
   verifyFormat("var [a, b, c] = [1, 2, 3];");
   verifyFormat("const [a, b, c] = [1, 2, 3];");
@@ -2366,6 +2359,61 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) {
   verifyFormat("return !!x;\n");
 }
 
+TEST_F(FormatTestJS, CppKeywords) {
+  // Make sure we don't mess stuff up because of C++ keywords.
+  verifyFormat("return operator && (aa);");
+  // .. or QT ones.
+  verifyFormat("const slots: Slot[];");
+  // use the "!" assertion operator to validate that clang-format understands
+  // these C++ keywords aren't keywords in JS/TS.
+  verifyFormat("auto!;");
+  verifyFormat("char!;");
+  verifyFormat("concept!;");
+  verifyFormat("double!;");
+  verifyFormat("extern!;");
+  verifyFormat("float!;");
+  verifyFormat("inline!;");
+  verifyFormat("int!;");
+  verifyFormat("long!;");
+  verifyFormat("register!;");
+  verifyFormat("restrict!;");
+  verifyFormat("sizeof!;");
+  verifyFormat("struct!;");
+  verifyFormat("typedef!;");
+  verifyFormat("union!;");
+  verifyFormat("unsigned!;");
+  verifyFormat("volatile!;");
+  verifyFormat("_Alignas!;");
+  verifyFormat("_Alignof!;");
+  verifyFormat("_Atomic!;");
+  verifyFormat("_Bool!;");
+  verifyFormat("_Complex!;");
+  verifyFormat("_Generic!;");
+  verifyFormat("_Imaginary!;");
+  verifyFormat("_Noreturn!;");
+  verifyFormat("_Static_assert!;");
+  verifyFormat("_Thread_local!;");
+  verifyFormat("__func__!;");
+  verifyFormat("__objc_yes!;");
+  verifyFormat("__objc_no!;");
+  verifyFormat("asm!;");
+  verifyFormat("bool!;");
+  verifyFormat("const_cast!;");
+  verifyFormat("dynamic_cast!;");
+  verifyFormat("explicit!;");
+  verifyFormat("friend!;");
+  verifyFormat("mutable!;");
+  verifyFormat("operator!;");
+  verifyFormat("reinterpret_cast!;");
+  verifyFormat("static_cast!;");
+  verifyFormat("template!;");
+  verifyFormat("typename!;");
+  verifyFormat("typeid!;");
+  verifyFormat("using!;");
+  verifyFormat("virtual!;");
+  verifyFormat("wchar_t!;");
+}
+
 TEST_F(FormatTestJS, NullPropagatingOperator) {
   verifyFormat("let x = foo?.bar?.baz();\n");
   verifyFormat("let x = foo?.(foo);\n");


        


More information about the cfe-commits mailing list