r262534 - clang-format: [JS] Optionally re-quote string literals.

Daniel Jasper via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 2 14:44:04 PST 2016


Author: djasper
Date: Wed Mar  2 16:44:03 2016
New Revision: 262534

URL: http://llvm.org/viewvc/llvm-project?rev=262534&view=rev
Log:
clang-format: [JS] Optionally re-quote string literals.

Turns "foo" into 'foo' (or vice versa, depending on configuration).
This makes it more convenient to follow the Google JavaScript style
guide:
https://google.github.io/styleguide/javascriptguide.xml?showone=Strings#Strings

This functionality is behind the option "JavaScriptQuotes", which can be:

  * "leave" (no re-quoting)
  * "single" (change to single quotes)
  * "double" (change to double quotes)

This also changes single quoted JavaScript string literals to be treated
as tok::string_literal, not tok::char_literal, which fixes two unrelated
tests.

Patch by Martin Probst. Thank you.

Modified:
    cfe/trunk/include/clang/Format/Format.h
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp
    cfe/trunk/unittests/Format/FormatTestJS.cpp

Modified: cfe/trunk/include/clang/Format/Format.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Format/Format.h?rev=262534&r1=262533&r2=262534&view=diff
==============================================================================
--- cfe/trunk/include/clang/Format/Format.h (original)
+++ cfe/trunk/include/clang/Format/Format.h Wed Mar  2 16:44:03 2016
@@ -594,6 +594,20 @@ struct FormatStyle {
   /// \brief The way to use tab characters in the resulting file.
   UseTabStyle UseTab;
 
+  /// \brief Quotation styles for JavaScript strings. Does not affect template
+  /// strings.
+  enum JavaScriptQuoteStyle {
+    /// Leave string quotes as they are.
+    JSQS_Leave,
+    /// Always use single quotes.
+    JSQS_Single,
+    /// Always use double quotes.
+    JSQS_Double
+  };
+
+  /// \brief The JavaScriptQuoteStyle to use for JavaScript strings.
+  JavaScriptQuoteStyle JavaScriptQuotes;
+
   bool operator==(const FormatStyle &R) const {
     return AccessModifierOffset == R.AccessModifierOffset &&
            AlignAfterOpenBracket == R.AlignAfterOpenBracket &&
@@ -670,7 +684,8 @@ struct FormatStyle {
            SpacesInParentheses == R.SpacesInParentheses &&
            SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
            Standard == R.Standard && TabWidth == R.TabWidth &&
-           UseTab == R.UseTab;
+           UseTab == R.UseTab &&
+           JavaScriptQuotes == R.JavaScriptQuotes;
   }
 };
 

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=262534&r1=262533&r2=262534&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Wed Mar  2 16:44:03 2016
@@ -71,6 +71,14 @@ template <> struct ScalarEnumerationTrai
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
+  static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
+    IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
+    IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
+    IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
   static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
     IO.enumCase(Value, "None", FormatStyle::SFS_None);
@@ -335,6 +343,7 @@ template <> struct MappingTraits<FormatS
     IO.mapOptional("Standard", Style.Standard);
     IO.mapOptional("TabWidth", Style.TabWidth);
     IO.mapOptional("UseTab", Style.UseTab);
+    IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
   }
 };
 
@@ -522,6 +531,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.SpacesBeforeTrailingComments = 1;
   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
+  LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
   LLVMStyle.ReflowComments = true;
   LLVMStyle.SpacesInParentheses = false;
   LLVMStyle.SpacesInSquareBrackets = false;
@@ -590,6 +600,7 @@ FormatStyle getGoogleStyle(FormatStyle::
     GoogleStyle.CommentPragmas = "@(export|return|see|visibility) ";
     GoogleStyle.MaxEmptyLinesToKeep = 3;
     GoogleStyle.SpacesInContainerLiterals = false;
+    GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
   } else if (Language == FormatStyle::LK_Proto) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
     GoogleStyle.SpacesInContainerLiterals = false;
@@ -766,13 +777,13 @@ namespace {
 class FormatTokenLexer {
 public:
   FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
-                   encoding::Encoding Encoding)
+                   encoding::Encoding Encoding, tooling::Replacements &Replaces)
       : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
         LessStashed(false), Column(0), TrailingWhitespace(0),
         SourceMgr(SourceMgr), ID(ID), Style(Style),
         IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-        MacroBlockBeginRegex(Style.MacroBlockBegin),
+        Encoding(Encoding), Replaces(Replaces), FirstInLineIndex(0),
+        FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
         MacroBlockEndRegex(Style.MacroBlockEnd) {
     Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
                         getFormattingLangOpts(Style)));
@@ -791,6 +802,8 @@ public:
       if (Style.Language == FormatStyle::LK_JavaScript)
         tryParseJSRegexLiteral();
       tryMergePreviousTokens();
+      if (Style.Language == FormatStyle::LK_JavaScript)
+        tryRequoteJSStringLiteral();
       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
         FirstInLineIndex = Tokens.size() - 1;
     } while (Tokens.back()->Tok.isNot(tok::eof));
@@ -1061,6 +1074,75 @@ private:
     return false;
   }
 
+  // If the last token is a double/single-quoted string literal, generates a
+  // replacement with a single/double quoted string literal, re-escaping the
+  // contents in the process.
+  void tryRequoteJSStringLiteral() {
+    if (Style.JavaScriptQuotes == FormatStyle::JSQS_Leave)
+      return;
+
+    FormatToken *FormatTok = Tokens.back();
+    StringRef Input = FormatTok->TokenText;
+    if (!FormatTok->isStringLiteral() ||
+        // NB: testing for not starting with a double quote to avoid breaking
+        // `template strings`.
+        (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
+         !Input.startswith("\"")) ||
+        (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
+         !Input.startswith("\'")))
+      return;
+
+    // Change start and end quote.
+    bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
+    SourceLocation Start = FormatTok->Tok.getLocation();
+    auto Replace = [&](SourceLocation Start, unsigned Length,
+                       StringRef ReplacementText) {
+      Replaces.insert(
+          tooling::Replacement(SourceMgr, Start, Length, ReplacementText));
+    };
+    Replace(Start, 1, IsSingle ? "'" : "\"");
+    Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
+            IsSingle ? "'" : "\"");
+
+    // Escape internal quotes.
+    size_t ColumnWidth = FormatTok->TokenText.size();
+    bool Escaped = false;
+    for (size_t i = 1; i < Input.size() - 1; i++) {
+      switch (Input[i]) {
+      case '\\':
+        if (!Escaped && i + 1 < Input.size() &&
+            ((IsSingle && Input[i + 1] == '"') ||
+             (!IsSingle && Input[i + 1] == '\''))) {
+          // Remove this \, it's escaping a " or ' that no longer needs escaping
+          ColumnWidth--;
+          Replace(Start.getLocWithOffset(i), 1, "");
+          continue;
+        }
+        Escaped = !Escaped;
+        break;
+      case '\"':
+      case '\'':
+        if (!Escaped && IsSingle == (Input[i] == '\'')) {
+          // Escape the quote.
+          Replace(Start.getLocWithOffset(i), 0, "\\");
+          ColumnWidth++;
+        }
+        Escaped = false;
+        break;
+      default:
+        Escaped = false;
+        break;
+      }
+    }
+
+    // For formatting, count the number of non-escaped single quotes in them
+    // and adjust ColumnWidth to take the added escapes into account.
+    // FIXME(martinprobst): this might conflict with code breaking a long string
+    // literal (which clang-format doesn't do, yet). For that to work, this code
+    // would have to modify TokenText directly.
+    FormatTok->ColumnWidth = ColumnWidth;
+  }
+
   bool tryMerge_TMacro() {
     if (Tokens.size() < 4)
       return false;
@@ -1359,6 +1441,7 @@ private:
   IdentifierTable IdentTable;
   AdditionalKeywords Keywords;
   encoding::Encoding Encoding;
+  tooling::Replacements &Replaces;
   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
   // Index (in 'Tokens') of the last token that starts a new line.
   unsigned FirstInLineIndex;
@@ -1382,10 +1465,15 @@ private:
         Tok.IsUnterminatedLiteral = true;
       } else if (Style.Language == FormatStyle::LK_JavaScript &&
                  Tok.TokenText == "''") {
-        Tok.Tok.setKind(tok::char_constant);
+        Tok.Tok.setKind(tok::string_literal);
       }
     }
 
+    if (Style.Language == FormatStyle::LK_JavaScript &&
+        Tok.is(tok::char_constant)) {
+      Tok.Tok.setKind(tok::string_literal);
+    }
+
     if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
                                  Tok.TokenText == "/* clang-format on */")) {
       FormattingDisabled = false;
@@ -1443,7 +1531,7 @@ public:
 
   tooling::Replacements format(bool *IncompleteFormat) {
     tooling::Replacements Result;
-    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
+    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding, Result);
 
     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
                                *this);

Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=262534&r1=262533&r2=262534&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Wed Mar  2 16:44:03 2016
@@ -410,7 +410,7 @@ private:
                  Style.Language != FormatStyle::LK_Cpp)) ||
                Style.Language == FormatStyle::LK_Proto) &&
               (Previous->Tok.getIdentifierInfo() ||
-               Previous->is(tok::char_constant)))
+               Previous->is(tok::string_literal)))
             Previous->Type = TT_SelectorName;
           if (CurrentToken->is(tok::colon) ||
               Style.Language == FormatStyle::LK_JavaScript)
@@ -2170,8 +2170,8 @@ bool TokenAnnotator::mustBreakBefore(con
 
   if (Style.Language == FormatStyle::LK_JavaScript) {
     // FIXME: This might apply to other languages and token kinds.
-    if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
-        Left.Previous->is(tok::char_constant))
+    if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
+        Left.Previous->is(tok::string_literal))
       return true;
     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
         Left.Previous && Left.Previous->is(tok::equal) &&

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=262534&r1=262533&r2=262534&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Mar  2 16:44:03 2016
@@ -6652,7 +6652,7 @@ TEST_F(FormatTest, FormatsBracedListsInC
                "                   bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb};");
 
   verifyNoCrash("a<,");
-  
+
   // No braced initializer here.
   verifyFormat("void f() {\n"
                "  struct Dummy {};\n"

Modified: cfe/trunk/unittests/Format/FormatTestJS.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestJS.cpp?rev=262534&r1=262533&r2=262534&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestJS.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestJS.cpp Wed Mar  2 16:44:03 2016
@@ -257,7 +257,7 @@ TEST_F(FormatTestJS, SpacesInContainerLi
   verifyFormat("f({'a': [{}]});");
 }
 
-TEST_F(FormatTestJS, SingleQuoteStrings) {
+TEST_F(FormatTestJS, SingleQuotedStrings) {
   verifyFormat("this.function('', true);");
 }
 
@@ -883,7 +883,7 @@ TEST_F(FormatTestJS, Modules) {
   verifyFormat("import {\n"
                "  X,\n"
                "  Y,\n"
-               "} from 'some/long/module.js';",
+               "} from\n    'some/long/module.js';",
                getGoogleJSStyleWithColumns(20));
   verifyFormat("import {X as myLocalX, Y as myLocalY} from 'some/module.js';");
   verifyFormat("import * as lib from 'some/module.js';");
@@ -1094,5 +1094,36 @@ TEST_F(FormatTestJS, JSDocAnnotations) {
                    getGoogleJSStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestJS, RequoteStringsSingle) {
+  EXPECT_EQ("var x = 'foo';", format("var x = \"foo\";"));
+  EXPECT_EQ("var x = 'fo\\'o\\'';", format("var x = \"fo'o'\";"));
+  EXPECT_EQ("var x = 'fo\\'o\\'';", format("var x = \"fo\\'o'\";"));
+  EXPECT_EQ("var x =\n"
+            "    'foo\\'';",
+            // Code below is 15 chars wide, doesn't fit into the line with the
+            // \ escape added.
+            format("var x = \"foo'\";", getGoogleJSStyleWithColumns(15)));
+  // Removes no-longer needed \ escape from ".
+  EXPECT_EQ("var x = 'fo\"o';", format("var x = \"fo\\\"o\";"));
+  // Code below fits into 15 chars *after* removing the \ escape.
+  EXPECT_EQ("var x = 'fo\"o';",
+            format("var x = \"fo\\\"o\";", getGoogleJSStyleWithColumns(15)));
+}
+
+TEST_F(FormatTestJS, RequoteStringsDouble) {
+  FormatStyle DoubleQuotes = getGoogleStyle(FormatStyle::LK_JavaScript);
+  DoubleQuotes.JavaScriptQuotes = FormatStyle::JSQS_Double;
+  verifyFormat("var x = \"foo\";", DoubleQuotes);
+  EXPECT_EQ("var x = \"foo\";", format("var x = 'foo';", DoubleQuotes));
+  EXPECT_EQ("var x = \"fo'o\";", format("var x = 'fo\\'o';", DoubleQuotes));
+}
+
+TEST_F(FormatTestJS, RequoteStringsLeave) {
+  FormatStyle LeaveQuotes = getGoogleStyle(FormatStyle::LK_JavaScript);
+  LeaveQuotes.JavaScriptQuotes = FormatStyle::JSQS_Leave;
+  verifyFormat("var x = \"foo\";", LeaveQuotes);
+  verifyFormat("var x = 'foo';", LeaveQuotes);
+}
+
 } // end namespace tooling
 } // end namespace clang




More information about the cfe-commits mailing list