[clang] 146d685 - clang-format: [JS] detect C++ keywords.
Martin Probst via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 3 05:24:07 PDT 2020
Author: Martin Probst
Date: 2020-04-03T14:23:56+02:00
New Revision: 146d685cd657399a4698015f16cc5910cc828728
URL: https://github.com/llvm/llvm-project/commit/146d685cd657399a4698015f16cc5910cc828728
DIFF: https://github.com/llvm/llvm-project/commit/146d685cd657399a4698015f16cc5910cc828728.diff
LOG: clang-format: [JS] detect C++ keywords.
Summary:
C++ defines a number of keywords that are regular identifiers in
JavaScript, e.g. `concept`:
const concept = 1; // legit JS
This change expands the existing `IsJavaScriptIdentifier(Tok)` function
to return false for C++ keywords that aren't keywords in JS.
Reviewers: krasimir
Subscribers: jfb, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D77311
Added:
Modified:
clang/lib/Format/FormatToken.h
clang/lib/Format/TokenAnnotator.cpp
clang/unittests/Format/FormatTestJS.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 10a5f0e96f96..48ec7602c21c 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -910,9 +910,64 @@ struct AdditionalKeywords {
/// Returns \c true if \p Tok is a true JavaScript identifier, returns
/// \c false if it is a keyword or a pseudo keyword.
bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
- return Tok.is(tok::identifier) &&
- JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
- JsExtraKeywords.end();
+ // Based on the list of JavaScript & TypeScript keywords here:
+ // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74
+ switch (Tok.Tok.getKind()) {
+ case tok::kw_break:
+ case tok::kw_case:
+ case tok::kw_catch:
+ case tok::kw_class:
+ case tok::kw_continue:
+ case tok::kw_const:
+ case tok::kw_default:
+ case tok::kw_delete:
+ case tok::kw_do:
+ case tok::kw_else:
+ case tok::kw_enum:
+ case tok::kw_export:
+ case tok::kw_false:
+ case tok::kw_for:
+ case tok::kw_if:
+ case tok::kw_import:
+ case tok::kw_module:
+ case tok::kw_new:
+ case tok::kw_private:
+ case tok::kw_protected:
+ case tok::kw_public:
+ case tok::kw_return:
+ case tok::kw_static:
+ case tok::kw_switch:
+ case tok::kw_this:
+ case tok::kw_throw:
+ case tok::kw_true:
+ case tok::kw_try:
+ case tok::kw_typeof:
+ case tok::kw_void:
+ case tok::kw_while:
+ // These are JS keywords that are lexed by LLVM/clang as keywords.
+ return false;
+ case tok::identifier:
+ // For identifiers, make sure they are true identifiers, excluding the
+ // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords).
+ return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
+ JsExtraKeywords.end();
+ default:
+ // Other keywords are handled in the switch below, to avoid problems due
+ // to duplicate case labels when using the #include trick.
+ break;
+ }
+
+ switch (Tok.Tok.getKind()) {
+ // Handle C++ keywords not included above: these are all JS identifiers.
+#define KEYWORD(X, Y) case tok::kw_##X:
+#include "clang/Basic/TokenKinds.def"
+ // #undef KEYWORD is not needed -- it's #undef-ed at the end of
+ // TokenKinds.def
+ return true;
+ default:
+ // All other tokens (punctuation etc) are not JS identifiers.
+ return false;
+ }
}
/// Returns \c true if \p Tok is a C# keyword, returns
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index a3cd4f42f8f8..029741c3dce7 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1522,9 +1522,9 @@ class AnnotatingParser {
if (Style.Language == FormatStyle::LK_JavaScript) {
if (Current.is(tok::exclaim)) {
if (Current.Previous &&
- (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
- tok::r_paren, tok::r_square,
- tok::r_brace) ||
+ (Keywords.IsJavaScriptIdentifier(*Current.Previous) ||
+ Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren,
+ tok::r_square, tok::r_brace) ||
Current.Previous->Tok.isLiteral())) {
Current.Type = TT_JsNonNullAssertion;
return;
@@ -3070,10 +3070,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
return false;
// In tagged template literals ("html`bar baz`"), there is no space between
- // the tag identifier and the template string. getIdentifierInfo makes sure
- // that the identifier is not a pseudo keyword like `yield`, either.
- if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
- Right.is(TT_TemplateString))
+ // the tag identifier and the template string.
+ if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString))
return false;
if (Right.is(tok::star) &&
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp
index 6efb8662f0f5..3fd795c526b1 100644
--- a/clang/unittests/Format/FormatTestJS.cpp
+++ b/clang/unittests/Format/FormatTestJS.cpp
@@ -386,13 +386,6 @@ TEST_F(FormatTestJS, ReservedWordsParenthesized) {
"return (x);\n");
}
-TEST_F(FormatTestJS, CppKeywords) {
- // Make sure we don't mess stuff up because of C++ keywords.
- verifyFormat("return operator && (aa);");
- // .. or QT ones.
- verifyFormat("slots: Slot[];");
-}
-
TEST_F(FormatTestJS, ES6DestructuringAssignment) {
verifyFormat("var [a, b, c] = [1, 2, 3];");
verifyFormat("const [a, b, c] = [1, 2, 3];");
@@ -2366,6 +2359,61 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) {
verifyFormat("return !!x;\n");
}
+TEST_F(FormatTestJS, CppKeywords) {
+ // Make sure we don't mess stuff up because of C++ keywords.
+ verifyFormat("return operator && (aa);");
+ // .. or QT ones.
+ verifyFormat("const slots: Slot[];");
+ // use the "!" assertion operator to validate that clang-format understands
+ // these C++ keywords aren't keywords in JS/TS.
+ verifyFormat("auto!;");
+ verifyFormat("char!;");
+ verifyFormat("concept!;");
+ verifyFormat("double!;");
+ verifyFormat("extern!;");
+ verifyFormat("float!;");
+ verifyFormat("inline!;");
+ verifyFormat("int!;");
+ verifyFormat("long!;");
+ verifyFormat("register!;");
+ verifyFormat("restrict!;");
+ verifyFormat("sizeof!;");
+ verifyFormat("struct!;");
+ verifyFormat("typedef!;");
+ verifyFormat("union!;");
+ verifyFormat("unsigned!;");
+ verifyFormat("volatile!;");
+ verifyFormat("_Alignas!;");
+ verifyFormat("_Alignof!;");
+ verifyFormat("_Atomic!;");
+ verifyFormat("_Bool!;");
+ verifyFormat("_Complex!;");
+ verifyFormat("_Generic!;");
+ verifyFormat("_Imaginary!;");
+ verifyFormat("_Noreturn!;");
+ verifyFormat("_Static_assert!;");
+ verifyFormat("_Thread_local!;");
+ verifyFormat("__func__!;");
+ verifyFormat("__objc_yes!;");
+ verifyFormat("__objc_no!;");
+ verifyFormat("asm!;");
+ verifyFormat("bool!;");
+ verifyFormat("const_cast!;");
+ verifyFormat("dynamic_cast!;");
+ verifyFormat("explicit!;");
+ verifyFormat("friend!;");
+ verifyFormat("mutable!;");
+ verifyFormat("operator!;");
+ verifyFormat("reinterpret_cast!;");
+ verifyFormat("static_cast!;");
+ verifyFormat("template!;");
+ verifyFormat("typename!;");
+ verifyFormat("typeid!;");
+ verifyFormat("using!;");
+ verifyFormat("virtual!;");
+ verifyFormat("wchar_t!;");
+}
+
TEST_F(FormatTestJS, NullPropagatingOperator) {
verifyFormat("let x = foo?.bar?.baz();\n");
verifyFormat("let x = foo?.(foo);\n");
More information about the cfe-commits
mailing list