r263470 - clang-format: [JS] Handle certain cases of ASI.

Daniel Jasper via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 14 12:21:36 PDT 2016


Author: djasper
Date: Mon Mar 14 14:21:36 2016
New Revision: 263470

URL: http://llvm.org/viewvc/llvm-project?rev=263470&view=rev
Log:
clang-format: [JS] Handle certain cases of ASI.

Automatic Semicolon Insertion can only be properly handled by parsing
source code. However conservatively catching just a few, common
situations prevents breaking code during development, which greatly
improves usability.

JS code should still use semicolons, and ASI code should be flagged by
a compiler or linter.

Patch by Martin Probst. Thank you.

Modified:
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/unittests/Format/FormatTestJS.cpp

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=263470&r1=263469&r2=263470&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Mon Mar 14 14:21:36 2016
@@ -660,6 +660,72 @@ static bool tokenCanStartNewLine(const c
          Tok.isNot(tok::kw_noexcept);
 }
 
+static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
+                                 const FormatToken *FormatTok) {
+  if (FormatTok->Tok.isLiteral())
+    return true;
+  // FIXME: This returns true for C/C++ keywords like 'struct'.
+  return FormatTok->is(tok::identifier) &&
+         (FormatTok->Tok.getIdentifierInfo() == nullptr ||
+          !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of,
+                              Keywords.kw_finally, Keywords.kw_function,
+                              Keywords.kw_import, Keywords.kw_is,
+                              Keywords.kw_let, Keywords.kw_var,
+                              Keywords.kw_abstract, Keywords.kw_extends,
+                              Keywords.kw_implements, Keywords.kw_instanceof,
+                              Keywords.kw_interface, Keywords.kw_throws));
+}
+
+// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
+// when encountered after a value (see mustBeJSIdentOrValue).
+static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
+                           const FormatToken *FormatTok) {
+  return FormatTok->isOneOf(
+      tok::kw_return,
+      // conditionals
+      tok::kw_if, tok::kw_else,
+      // loops
+      tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
+      // switch/case
+      tok::kw_switch, tok::kw_case,
+      // exceptions
+      tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
+      // declaration
+      tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
+      Keywords.kw_function);
+}
+
+// readTokenWithJavaScriptASI reads the next token and terminates the current
+// line if JavaScript Automatic Semicolon Insertion must
+// happen between the current token and the next token.
+//
+// This method is conservative - it cannot cover all edge cases of JavaScript,
+// but only aims to correctly handle certain well known cases. It *must not*
+// return true in speculative cases.
+void UnwrappedLineParser::readTokenWithJavaScriptASI() {
+  FormatToken *Previous = FormatTok;
+  readToken();
+  FormatToken *Next = FormatTok;
+
+  bool IsOnSameLine =
+      CommentsBeforeNextToken.empty()
+          ? Next->NewlinesBefore == 0
+          : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
+  if (IsOnSameLine)
+    return;
+
+  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
+  if (Next->is(tok::exclaim) && PreviousMustBeValue)
+    addUnwrappedLine();
+  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
+  if (NextMustBeValue && (PreviousMustBeValue ||
+                          Previous->isOneOf(tok::r_square, tok::r_paren,
+                                            tok::plusplus, tok::minusminus)))
+    addUnwrappedLine();
+  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
+    addUnwrappedLine();
+}
+
 void UnwrappedLineParser::parseStructuralElement() {
   assert(!FormatTok->is(tok::l_brace));
   if (Style.Language == FormatStyle::LK_TableGen &&
@@ -936,6 +1002,7 @@ void UnwrappedLineParser::parseStructura
         return;
       }
 
+      // See if the following token should start a new unwrapped line.
       StringRef Text = FormatTok->TokenText;
       nextToken();
       if (Line->Tokens.size() == 1 &&
@@ -1898,7 +1965,10 @@ void UnwrappedLineParser::nextToken() {
     return;
   flushComments(isOnNewLine(*FormatTok));
   pushToken(FormatTok);
-  readToken();
+  if (Style.Language != FormatStyle::LK_JavaScript)
+    readToken();
+  else
+    readTokenWithJavaScriptASI();
 }
 
 const FormatToken *UnwrappedLineParser::getPreviousToken() {

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=263470&r1=263469&r2=263470&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Mon Mar 14 14:21:36 2016
@@ -81,6 +81,7 @@ private:
   void parsePPElse();
   void parsePPEndIf();
   void parsePPUnknown();
+  void readTokenWithJavaScriptASI();
   void parseStructuralElement();
   bool tryToParseBracedList();
   bool parseBracedList(bool ContinueOnSemicolons = false);

Modified: cfe/trunk/unittests/Format/FormatTestJS.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestJS.cpp?rev=263470&r1=263469&r2=263470&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestJS.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestJS.cpp Mon Mar 14 14:21:36 2016
@@ -49,8 +49,16 @@ protected:
   static void verifyFormat(
       llvm::StringRef Code,
       const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) {
-    std::string result = format(test::messUp(Code), Style);
-    EXPECT_EQ(Code.str(), result) << "Formatted:\n" << result;
+    std::string Result = format(test::messUp(Code), Style);
+    EXPECT_EQ(Code.str(), Result) << "Formatted:\n" << Result;
+  }
+
+  static void verifyFormat(
+      llvm::StringRef Expected,
+      llvm::StringRef Code,
+      const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) {
+    std::string Result = format(Code, Style);
+    EXPECT_EQ(Expected.str(), Result) << "Formatted:\n" << Result;
   }
 };
 
@@ -612,7 +620,7 @@ TEST_F(FormatTestJS, ForLoops) {
                "}");
 }
 
-TEST_F(FormatTestJS, AutomaticSemicolonInsertion) {
+TEST_F(FormatTestJS, WrapRespectsAutomaticSemicolonInsertion) {
   // The following statements must not wrap, as otherwise the program meaning
   // would change due to automatic semicolon insertion.
   // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1.
@@ -628,6 +636,54 @@ TEST_F(FormatTestJS, AutomaticSemicolonI
                getGoogleJSStyleWithColumns(12));
 }
 
+TEST_F(FormatTestJS, AutomaticSemicolonInsertionHeuristic) {
+  verifyFormat("a\n"
+               "b;",
+               " a \n"
+               " b ;");
+  verifyFormat("a()\n"
+               "b;",
+               " a ()\n"
+               " b ;");
+  verifyFormat("a[b]\n"
+               "c;",
+               "a [b]\n"
+               "c ;");
+  verifyFormat("1\n"
+               "a;",
+               "1 \n"
+               "a ;");
+  verifyFormat("a\n"
+               "1;",
+               "a \n"
+               "1 ;");
+  verifyFormat("a\n"
+               "'x';",
+               "a \n"
+               " 'x';");
+  verifyFormat("a++\n"
+               "b;",
+               "a ++\n"
+               "b ;");
+  verifyFormat("a\n"
+               "!b && c;",
+               "a \n"
+               " ! b && c;");
+  verifyFormat("a\n"
+               "if (1) f();",
+               " a\n"
+               " if (1) f();");
+  verifyFormat("a\n"
+               "class X {}",
+               " a\n"
+               " class X {}");
+  verifyFormat("var a", "var\n"
+                        "a");
+  verifyFormat("x instanceof String", "x\n"
+                                      "instanceof\n"
+                                      "String");
+}
+
 TEST_F(FormatTestJS, ClosureStyleCasts) {
   verifyFormat("var x = /** @type {foo} */ (bar);");
 }
@@ -726,13 +782,13 @@ TEST_F(FormatTestJS, RegexLiteralSpecial
   verifyFormat("var regex = /\a\\//g;");
   verifyFormat("var regex = /a\\//;\n"
                "var x = 0;");
-  EXPECT_EQ("var regex = /'/g;", format("var regex = /'/g ;"));
-  EXPECT_EQ("var regex = /'/g;  //'", format("var regex = /'/g ; //'"));
-  EXPECT_EQ("var regex = /\\/*/;\n"
-            "var x = 0;",
-            format("var regex = /\\/*/;\n"
-                   "var x=0;"));
-  EXPECT_EQ("var x = /a\\//;", format("var x = /a\\//  \n;"));
+  verifyFormat("var regex = /'/g;", "var regex = /'/g ;");
+  verifyFormat("var regex = /'/g;  //'", "var regex = /'/g ; //'");
+  verifyFormat("var regex = /\\/*/;\n"
+               "var x = 0;",
+               "var regex = /\\/*/;\n"
+               "var x=0;");
+  verifyFormat("var x = /a\\//;", "var x = /a\\//  \n;");
   verifyFormat("var regex = /\"/;", getGoogleJSStyleWithColumns(16));
   verifyFormat("var regex =\n"
                "    /\"/;",
@@ -942,38 +998,37 @@ TEST_F(FormatTestJS, Modules) {
 
 TEST_F(FormatTestJS, TemplateStrings) {
   // Keeps any whitespace/indentation within the template string.
-  EXPECT_EQ("var x = `hello\n"
+  verifyFormat("var x = `hello\n"
             "     ${  name    }\n"
             "  !`;",
-            format("var x    =    `hello\n"
+            "var x    =    `hello\n"
                    "     ${  name    }\n"
-                   "  !`;"));
+                   "  !`;");
 
   verifyFormat("var x =\n"
                "    `hello ${world}` >= some();",
                getGoogleJSStyleWithColumns(34)); // Barely doesn't fit.
   verifyFormat("var x = `hello ${world}` >= some();",
                getGoogleJSStyleWithColumns(35)); // Barely fits.
-  EXPECT_EQ("var x = `hello\n"
+  verifyFormat("var x = `hello\n"
             "  ${world}` >=\n"
             "    some();",
-            format("var x =\n"
+            "var x =\n"
                    "    `hello\n"
                    "  ${world}` >= some();",
-                   getGoogleJSStyleWithColumns(21))); // Barely doesn't fit.
-  EXPECT_EQ("var x = `hello\n"
+                   getGoogleJSStyleWithColumns(21)); // Barely doesn't fit.
+  verifyFormat("var x = `hello\n"
             "  ${world}` >= some();",
-            format("var x =\n"
+            "var x =\n"
                    "    `hello\n"
                    "  ${world}` >= some();",
-                   getGoogleJSStyleWithColumns(22))); // Barely fits.
+                   getGoogleJSStyleWithColumns(22)); // Barely fits.
 
   verifyFormat("var x =\n"
                "    `h`;",
                getGoogleJSStyleWithColumns(11));
-  EXPECT_EQ(
-      "var x =\n    `multi\n  line`;",
-      format("var x = `multi\n  line`;", getGoogleJSStyleWithColumns(13)));
+  verifyFormat("var x =\n    `multi\n  line`;", "var x = `multi\n  line`;",
+               getGoogleJSStyleWithColumns(13));
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
                "    `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);");
 
@@ -987,39 +1042,38 @@ TEST_F(FormatTestJS, TemplateStrings) {
   verifyFormat("var x = `hello` == `hello`;");
 
   // Comments in template strings.
-  EXPECT_EQ("var x = `//a`;\n"
-            "var y;",
-            format("var x =\n `//a`;\n"
-                   "var y  ;"));
-  EXPECT_EQ("var x = `/*a`;\n"
+  verifyFormat("var x = `//a`;\n"
             "var y;",
-            format("var x =\n `/*a`;\n"
-                   "var y;"));
+            "var x =\n `//a`;\n"
+                   "var y  ;");
+  verifyFormat("var x = `/*a`;\n"
+               "var y;",
+               "var x =\n `/*a`;\n"
+               "var y;");
   // Unterminated string literals in a template string.
   verifyFormat("var x = `'`;  // comment with matching quote '\n"
                "var y;");
   verifyFormat("var x = `\"`;  // comment with matching quote \"\n"
                "var y;");
-  EXPECT_EQ("it(`'aaaaaaaaaaaaaaa   `, aaaaaaaaa);",
-            format("it(`'aaaaaaaaaaaaaaa   `,   aaaaaaaaa) ;",
-                   getGoogleJSStyleWithColumns(40)));
+  verifyFormat("it(`'aaaaaaaaaaaaaaa   `, aaaaaaaaa);",
+               "it(`'aaaaaaaaaaaaaaa   `,   aaaaaaaaa) ;",
+               getGoogleJSStyleWithColumns(40));
   // Backticks in a comment - not a template string.
-  EXPECT_EQ("var x = 1  // `/*a`;\n"
-            "    ;",
-            format("var x =\n 1  // `/*a`;\n"
-                   "    ;"));
-  EXPECT_EQ("/* ` */ var x = 1; /* ` */",
-            format("/* ` */ var x\n= 1; /* ` */"));
+  verifyFormat("var x = 1  // `/*a`;\n"
+               "    ;",
+               "var x =\n 1  // `/*a`;\n"
+               "    ;");
+  verifyFormat("/* ` */ var x = 1; /* ` */", "/* ` */ var x\n= 1; /* ` */");
   // Comment spans multiple template strings.
-  EXPECT_EQ("var x = `/*a`;\n"
-            "var y = ` */ `;",
-            format("var x =\n `/*a`;\n"
-                   "var y =\n ` */ `;"));
+  verifyFormat("var x = `/*a`;\n"
+               "var y = ` */ `;",
+               "var x =\n `/*a`;\n"
+               "var y =\n ` */ `;");
   // Escaped backtick.
-  EXPECT_EQ("var x = ` \\` a`;\n"
-            "var y;",
-            format("var x = ` \\` a`;\n"
-                   "var y;"));
+  verifyFormat("var x = ` \\` a`;\n"
+               "var y;",
+               "var x = ` \\` a`;\n"
+               "var y;");
 }
 
 TEST_F(FormatTestJS, CastSyntax) { verifyFormat("var x = <type>foo;"); }
@@ -1089,37 +1143,38 @@ TEST_F(FormatTestJS, WrapAfterParen) {
 }
 
 TEST_F(FormatTestJS, JSDocAnnotations) {
-  EXPECT_EQ("/**\n"
-            " * @export {this.is.a.long.path.to.a.Type}\n"
-            " */",
-            format("/**\n"
-                   " * @export {this.is.a.long.path.to.a.Type}\n"
-                   " */",
-                   getGoogleJSStyleWithColumns(20)));
+  verifyFormat("/**\n"
+               " * @export {this.is.a.long.path.to.a.Type}\n"
+               " */",
+               "/**\n"
+               " * @export {this.is.a.long.path.to.a.Type}\n"
+               " */",
+               getGoogleJSStyleWithColumns(20));
 }
 
 TEST_F(FormatTestJS, RequoteStringsSingle) {
-  EXPECT_EQ("var x = 'foo';", format("var x = \"foo\";"));
-  EXPECT_EQ("var x = 'fo\\'o\\'';", format("var x = \"fo'o'\";"));
-  EXPECT_EQ("var x = 'fo\\'o\\'';", format("var x = \"fo\\'o'\";"));
-  EXPECT_EQ("var x =\n"
-            "    'foo\\'';",
-            // Code below is 15 chars wide, doesn't fit into the line with the
-            // \ escape added.
-            format("var x = \"foo'\";", getGoogleJSStyleWithColumns(15)));
+  verifyFormat("var x = 'foo';", "var x = \"foo\";");
+  verifyFormat("var x = 'fo\\'o\\'';", "var x = \"fo'o'\";");
+  verifyFormat("var x = 'fo\\'o\\'';", "var x = \"fo\\'o'\";");
+  verifyFormat(
+      "var x =\n"
+      "    'foo\\'';",
+      // Code below is 15 chars wide, doesn't fit into the line with the
+      // \ escape added.
+      "var x = \"foo'\";", getGoogleJSStyleWithColumns(15));
   // Removes no-longer needed \ escape from ".
-  EXPECT_EQ("var x = 'fo\"o';", format("var x = \"fo\\\"o\";"));
+  verifyFormat("var x = 'fo\"o';", "var x = \"fo\\\"o\";");
   // Code below fits into 15 chars *after* removing the \ escape.
-  EXPECT_EQ("var x = 'fo\"o';",
-            format("var x = \"fo\\\"o\";", getGoogleJSStyleWithColumns(15)));
+  verifyFormat("var x = 'fo\"o';", "var x = \"fo\\\"o\";",
+               getGoogleJSStyleWithColumns(15));
 }
 
 TEST_F(FormatTestJS, RequoteStringsDouble) {
   FormatStyle DoubleQuotes = getGoogleStyle(FormatStyle::LK_JavaScript);
   DoubleQuotes.JavaScriptQuotes = FormatStyle::JSQS_Double;
   verifyFormat("var x = \"foo\";", DoubleQuotes);
-  EXPECT_EQ("var x = \"foo\";", format("var x = 'foo';", DoubleQuotes));
-  EXPECT_EQ("var x = \"fo'o\";", format("var x = 'fo\\'o';", DoubleQuotes));
+  verifyFormat("var x = \"foo\";", "var x = 'foo';", DoubleQuotes);
+  verifyFormat("var x = \"fo'o\";", "var x = 'fo\\'o';", DoubleQuotes);
 }
 
 TEST_F(FormatTestJS, RequoteStringsLeave) {




More information about the cfe-commits mailing list