r195961 - Added LanguageStandard::LS_JavaScript to gate all JS-specific parsing.

Alexander Kornienko alexfh at google.com
Fri Nov 29 07:19:43 PST 2013


Author: alexfh
Date: Fri Nov 29 09:19:43 2013
New Revision: 195961

URL: http://llvm.org/viewvc/llvm-project?rev=195961&view=rev
Log:
Added LanguageStandard::LS_JavaScript to gate all JS-specific parsing.

Summary:
Use LS_JavaScript for files ending with ".js". Added support for ">>>="
operator.

Reviewers: djasper, klimek

Reviewed By: djasper

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D2242

Added:
    cfe/trunk/test/Format/language-detection.cpp
Modified:
    cfe/trunk/include/clang/Format/Format.h
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/include/clang/Format/Format.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Format/Format.h?rev=195961&r1=195960&r2=195961&view=diff
==============================================================================
--- cfe/trunk/include/clang/Format/Format.h (original)
+++ cfe/trunk/include/clang/Format/Format.h Fri Nov 29 09:19:43 2013
@@ -30,6 +30,21 @@ namespace format {
 /// \brief The \c FormatStyle is used to configure the formatting to follow
 /// specific guidelines.
 struct FormatStyle {
+  /// \brief Supported languages. When stored in a configuration file, specifies
+  /// the language, that the configuration targets. When passed to the
+  /// reformat() function, enables syntax features specific to the language.
+  enum LanguageKind {
+    /// Do not use.
+    LK_None,
+    /// Should be used for C, C++, ObjectiveC, ObjectiveC++.
+    LK_Cpp,
+    /// Should be used for JavaScript.
+    LK_JavaScript
+  };
+
+  /// \brief Language, this format style is targeted at.
+  LanguageKind Language;
+
   /// \brief The column limit.
   ///
   /// A column limit of \c 0 means that there is no column limit. In this case,
@@ -283,7 +298,7 @@ struct FormatStyle {
            IndentCaseLabels == R.IndentCaseLabels &&
            IndentFunctionDeclarationAfterType ==
                R.IndentFunctionDeclarationAfterType &&
-           IndentWidth == R.IndentWidth &&
+           IndentWidth == R.IndentWidth && Language == R.Language &&
            MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
            NamespaceIndentation == R.NamespaceIndentation &&
            ObjCSpaceBeforeProtocolList == R.ObjCSpaceBeforeProtocolList &&

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=195961&r1=195960&r2=195961&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Nov 29 09:19:43 2013
@@ -34,6 +34,15 @@
 namespace llvm {
 namespace yaml {
 template <>
+struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageKind> {
+  static void enumeration(IO &IO,
+                          clang::format::FormatStyle::LanguageKind &Value) {
+    IO.enumCase(Value, "Cpp", clang::format::FormatStyle::LK_Cpp);
+    IO.enumCase(Value, "JavaScript", clang::format::FormatStyle::LK_JavaScript);
+  }
+};
+
+template <>
 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
   static void enumeration(IO &IO,
                           clang::format::FormatStyle::LanguageStandard &Value) {
@@ -99,13 +108,17 @@ template <> struct MappingTraits<clang::
     } else {
       StringRef BasedOnStyle;
       IO.mapOptional("BasedOnStyle", BasedOnStyle);
-      if (!BasedOnStyle.empty())
+      if (!BasedOnStyle.empty()) {
+        clang::format::FormatStyle::LanguageKind Language = Style.Language;
         if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
           return;
         }
+        Style.Language = Language;
+      }
     }
 
+    IO.mapOptional("Language", Style.Language);
     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
     IO.mapOptional("ConstructorInitializerIndentWidth",
                    Style.ConstructorInitializerIndentWidth);
@@ -173,6 +186,36 @@ template <> struct MappingTraits<clang::
     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
   }
 };
+
+// Allows to read vector<FormatStyle> while keeping default values.
+// Elements will be written or read starting from the 1st element.
+// When writing, the 0th element is ignored.
+// When reading, keys that are not present in the serialized form will be
+// copied from the 0th element of the vector. If the first element had no
+// Language specified, it will be treated as the default one for the following
+// elements.
+template <>
+struct DocumentListTraits<std::vector<clang::format::FormatStyle> > {
+  static size_t size(IO &io, std::vector<clang::format::FormatStyle> &Seq) {
+    return Seq.size() - 1;
+  }
+  static clang::format::FormatStyle &
+  element(IO &io, std::vector<clang::format::FormatStyle> &Seq, size_t Index) {
+    if (Index + 2 > Seq.size()) {
+      assert(Index + 2 == Seq.size() + 1);
+      clang::format::FormatStyle Template;
+      if (Seq.size() > 1 &&
+          Seq[1].Language == clang::format::FormatStyle::LK_None) {
+        Template = Seq[1];
+      } else {
+        Template = Seq[0];
+        Template.Language = clang::format::FormatStyle::LK_None;
+      }
+      Seq.resize(Index + 2, Template);
+    }
+    return Seq[Index + 1];
+  }
+};
 }
 }
 
@@ -188,6 +231,7 @@ void setDefaultPenalties(FormatStyle &St
 
 FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
+  LLVMStyle.Language = FormatStyle::LK_Cpp;
   LLVMStyle.AccessModifierOffset = -2;
   LLVMStyle.AlignEscapedNewlinesLeft = false;
   LLVMStyle.AlignTrailingComments = true;
@@ -236,6 +280,7 @@ FormatStyle getLLVMStyle() {
 
 FormatStyle getGoogleStyle() {
   FormatStyle GoogleStyle;
+  GoogleStyle.Language = FormatStyle::LK_Cpp;
   GoogleStyle.AccessModifierOffset = -1;
   GoogleStyle.AlignEscapedNewlinesLeft = true;
   GoogleStyle.AlignTrailingComments = true;
@@ -337,11 +382,42 @@ bool getPredefinedStyle(StringRef Name,
 }
 
 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
+  assert(Style);
+  assert(Style->Language != FormatStyle::LK_None);
   if (Text.trim().empty())
     return llvm::make_error_code(llvm::errc::invalid_argument);
+
+  std::vector<FormatStyle> Styles;
+  // DocumentListTraits<vector<FormatStyle>> uses 0th element as the default one
+  // for the fields, keys for which are missing from the configuration.
+  Styles.push_back(*Style);
   llvm::yaml::Input Input(Text);
-  Input >> *Style;
-  return Input.error();
+  Input >> Styles;
+  if (Input.error())
+    return Input.error();
+
+  for (unsigned i = 1; i < Styles.size(); ++i) {
+    // Ensures that only the first configuration can skip the Language option.
+    if (Styles[i].Language == FormatStyle::LK_None && i != 1)
+      return llvm::make_error_code(llvm::errc::invalid_argument);
+    // Ensure that each language is configured at most once.
+    for (unsigned j = 1; j < i; ++j) {
+      if (Styles[i].Language == Styles[j].Language)
+        return llvm::make_error_code(llvm::errc::invalid_argument);
+    }
+  }
+  // Look for a suitable configuration starting from the end, so we can
+  // find the configuration for the specific language first, and the default
+  // configuration (which can only be at slot 1) after it.
+  for (unsigned i = Styles.size() - 1; i > 0; --i) {
+    if (Styles[i].Language == Styles[0].Language ||
+        Styles[i].Language == FormatStyle::LK_None) {
+      *Style = Styles[i];
+      Style->Language = Styles[0].Language;
+      return llvm::make_error_code(llvm::errc::success);
+    }
+  }
+  return llvm::make_error_code(llvm::errc::not_supported);
 }
 
 std::string configurationAsText(const FormatStyle &Style) {
@@ -986,24 +1062,44 @@ public:
 
 private:
   void tryMergePreviousTokens() {
-    tryMerge_TMacro() || tryMergeJavaScriptIdentityOperators();
+    if (tryMerge_TMacro())
+      return;
+
+    if (Style.Language == FormatStyle::LK_JavaScript) {
+      static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
+      static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
+      static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
+                                               tok::greaterequal };
+      // FIXME: We probably need to change token type to mimic operator with the
+      // correct priority.
+      if (tryMergeTokens(JSIdentity))
+        return;
+      if (tryMergeTokens(JSNotIdentity))
+        return;
+      if (tryMergeTokens(JSShiftEqual))
+        return;
+    }
   }
 
-  bool tryMergeJavaScriptIdentityOperators() {
-    if (Tokens.size() < 2)
-      return false;
-    FormatToken &First = *Tokens[Tokens.size() - 2];
-    if (!First.isOneOf(tok::exclaimequal, tok::equalequal))
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
+    if (Tokens.size() < Kinds.size())
       return false;
-    FormatToken &Second = *Tokens.back();
-    if (!Second.is(tok::equal))
-      return false;
-    if (Second.WhitespaceRange.getBegin() != Second.WhitespaceRange.getEnd())
+
+    SmallVectorImpl<FormatToken *>::const_iterator First =
+        Tokens.end() - Kinds.size();
+    if (!First[0]->is(Kinds[0]))
       return false;
-    First.TokenText =
-        StringRef(First.TokenText.data(), First.TokenText.size() + 1);
-    First.ColumnWidth += 1;
-    Tokens.pop_back();
+    unsigned AddLength = 0;
+    for (unsigned i = 1; i < Kinds.size(); ++i) {
+      if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
+                                         First[i]->WhitespaceRange.getEnd())
+        return false;
+      AddLength += First[i]->TokenText.size();
+    }
+    Tokens.resize(Tokens.size() - Kinds.size() + 1);
+    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
+                                    First[0]->TokenText.size() + AddLength);
+    First[0]->ColumnWidth += AddLength;
     return true;
   }
 
@@ -1201,6 +1297,17 @@ private:
   }
 };
 
+static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
+  switch (Language) {
+  case FormatStyle::LK_Cpp:
+    return "C++";
+  case FormatStyle::LK_JavaScript:
+    return "JavaScript";
+  default:
+    return "Unknown";
+  }
+}
+
 class Formatter : public UnwrappedLineConsumer {
 public:
   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
@@ -1213,6 +1320,8 @@ public:
                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
                                                                : "unknown")
                        << "\n");
+    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+                       << "\n");
   }
 
   tooling::Replacements format() {
@@ -1538,11 +1647,26 @@ const char *StyleOptionHelpDescription =
     "parameters, e.g.:\n"
     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
 
+static void fillLanguageByFileName(StringRef FileName, FormatStyle *Style) {
+  if (FileName.endswith_lower(".c") || FileName.endswith_lower(".h") ||
+      FileName.endswith_lower(".cpp") || FileName.endswith_lower(".hpp") ||
+      FileName.endswith_lower(".cc") || FileName.endswith_lower(".hh") ||
+      FileName.endswith_lower(".cxx") || FileName.endswith_lower(".hxx") ||
+      FileName.endswith_lower(".m") || FileName.endswith_lower(".mm")) {
+    Style->Language = FormatStyle::LK_Cpp;
+  }
+  if (FileName.endswith_lower(".js")) {
+    Style->Language = FormatStyle::LK_JavaScript;
+  }
+}
+
 FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
+  // FIXME: Configure fallback style from outside (add a command line option).
   // Fallback style in case the rest of this function can't determine a style.
   StringRef FallbackStyle = "LLVM";
   FormatStyle Style;
   getPredefinedStyle(FallbackStyle, &Style);
+  fillLanguageByFileName(FileName, &Style);
 
   if (StyleName.startswith("{")) {
     // Parse YAML/JSON style from the command line.
@@ -1557,9 +1681,11 @@ FormatStyle getStyle(StringRef StyleName
     if (!getPredefinedStyle(StyleName, &Style))
       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
                    << " style\n";
+    fillLanguageByFileName(FileName, &Style);
     return Style;
   }
 
+  SmallString<128> UnsuitableConfigFiles;
   SmallString<128> Path(FileName);
   llvm::sys::fs::make_absolute(Path);
   for (StringRef Directory = Path; !Directory.empty();
@@ -1591,8 +1717,14 @@ FormatStyle getStyle(StringRef StyleName
         continue;
       }
       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
-        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
-                     << "\n";
+        if (ec == llvm::errc::not_supported) {
+          if (!UnsuitableConfigFiles.empty())
+            UnsuitableConfigFiles.append(", ");
+          UnsuitableConfigFiles.append(ConfigFile);
+        } else {
+          llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
+                       << "\n";
+        }
         continue;
       }
       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
@@ -1601,6 +1733,11 @@ FormatStyle getStyle(StringRef StyleName
   }
   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
                << " style\n";
+  if (!UnsuitableConfigFiles.empty()) {
+    llvm::errs() << "Configuration file(s) do(es) not support "
+                 << getLanguageName(Style.Language) << ": "
+                 << UnsuitableConfigFiles << "\n";
+  }
   return Style;
 }
 

Added: cfe/trunk/test/Format/language-detection.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Format/language-detection.cpp?rev=195961&view=auto
==============================================================================
--- cfe/trunk/test/Format/language-detection.cpp (added)
+++ cfe/trunk/test/Format/language-detection.cpp Fri Nov 29 09:19:43 2013
@@ -0,0 +1,7 @@
+// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.js
+// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.cpp
+// RUN: clang-format -style=llvm %t.js | FileCheck -strict-whitespace -check-prefix=CHECK1 %s
+// RUN: clang-format -style=llvm %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK2 %s
+// CHECK1: {{^a >>>= b;$}}
+// CHECK2: {{^a >> >= b;$}}
+a >>>= b;

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=195961&r1=195960&r2=195961&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Nov 29 09:19:43 2013
@@ -6939,8 +6939,6 @@ TEST_F(FormatTest, GetsPredefinedStyleBy
   EXPECT_FALSE(getPredefinedStyle("qwerty", &Styles[0]));
 }
 
-TEST_F(FormatTest, ParsesConfiguration) {
-  FormatStyle Style = {};
 #define CHECK_PARSE(TEXT, FIELD, VALUE)                                        \
   EXPECT_NE(VALUE, Style.FIELD);                                               \
   EXPECT_EQ(0, parseConfiguration(TEXT, &Style).value());                      \
@@ -6953,6 +6951,9 @@ TEST_F(FormatTest, ParsesConfiguration)
   EXPECT_EQ(0, parseConfiguration(#FIELD ": false", &Style).value());          \
   EXPECT_FALSE(Style.FIELD);
 
+TEST_F(FormatTest, ParsesConfiguration) {
+  FormatStyle Style = {};
+  Style.Language = FormatStyle::LK_Cpp;
   CHECK_PARSE_BOOL(AlignEscapedNewlinesLeft);
   CHECK_PARSE_BOOL(AlignTrailingComments);
   CHECK_PARSE_BOOL(AllowAllParametersOfDeclarationOnNextLine);
@@ -7027,15 +7028,117 @@ TEST_F(FormatTest, ParsesConfiguration)
               FormatStyle::NI_Inner);
   CHECK_PARSE("NamespaceIndentation: All", NamespaceIndentation,
               FormatStyle::NI_All);
+}
+
+TEST_F(FormatTest, ParsesConfigurationWithLanguages) {
+  FormatStyle Style = {};
+  Style.Language = FormatStyle::LK_Cpp;
+  CHECK_PARSE("Language: Cpp\n"
+              "IndentWidth: 12",
+              IndentWidth, 12u);
+  EXPECT_EQ(llvm::errc::not_supported,
+            parseConfiguration("Language: JavaScript\n"
+                               "IndentWidth: 34",
+                               &Style));
+  EXPECT_EQ(12u, Style.IndentWidth);
+  CHECK_PARSE("IndentWidth: 56", IndentWidth, 56u);
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style.Language);
+
+  Style.Language = FormatStyle::LK_JavaScript;
+  CHECK_PARSE("Language: JavaScript\n"
+              "IndentWidth: 12",
+              IndentWidth, 12u);
+  CHECK_PARSE("IndentWidth: 23", IndentWidth, 23u);
+  EXPECT_EQ(llvm::errc::not_supported, parseConfiguration("Language: Cpp\n"
+                                                          "IndentWidth: 34",
+                                                          &Style));
+  EXPECT_EQ(23u, Style.IndentWidth);
+  CHECK_PARSE("IndentWidth: 56", IndentWidth, 56u);
+  EXPECT_EQ(FormatStyle::LK_JavaScript, Style.Language);
+
+  CHECK_PARSE("BasedOnStyle: LLVM\n"
+              "IndentWidth: 67",
+              IndentWidth, 67u);
+
+  CHECK_PARSE("---\n"
+              "Language: JavaScript\n"
+              "IndentWidth: 12\n"
+              "---\n"
+              "Language: Cpp\n"
+              "IndentWidth: 34\n"
+              "...\n",
+              IndentWidth, 12u);
+
+  Style.Language = FormatStyle::LK_Cpp;
+  CHECK_PARSE("---\n"
+              "Language: JavaScript\n"
+              "IndentWidth: 12\n"
+              "---\n"
+              "Language: Cpp\n"
+              "IndentWidth: 34\n"
+              "...\n",
+              IndentWidth, 34u);
+  CHECK_PARSE("---\n"
+              "IndentWidth: 78\n"
+              "---\n"
+              "Language: JavaScript\n"
+              "IndentWidth: 56\n"
+              "...\n",
+              IndentWidth, 78u);
+
+  Style.ColumnLimit = 123;
+  Style.IndentWidth = 234;
+  Style.BreakBeforeBraces = FormatStyle::BS_Linux;
+  Style.TabWidth = 345;
+  EXPECT_EQ(llvm::errc::success,
+            parseConfiguration("---\n"
+                               "IndentWidth: 456\n"
+                               "BreakBeforeBraces: Allman\n"
+                               "---\n"
+                               "Language: JavaScript\n"
+                               "IndentWidth: 111\n"
+                               "TabWidth: 111\n"
+                               "---\n"
+                               "Language: Cpp\n"
+                               "BreakBeforeBraces: Stroustrup\n"
+                               "TabWidth: 789\n"
+                               "...\n",
+                               &Style));
+  EXPECT_EQ(123u, Style.ColumnLimit);
+  EXPECT_EQ(456u, Style.IndentWidth);
+  EXPECT_EQ(FormatStyle::BS_Stroustrup, Style.BreakBeforeBraces);
+  EXPECT_EQ(789u, Style.TabWidth);
+
+
+  EXPECT_EQ(llvm::errc::invalid_argument,
+            parseConfiguration("---\n"
+                               "Language: JavaScript\n"
+                               "IndentWidth: 56\n"
+                               "---\n"
+                               "IndentWidth: 78\n"
+                               "...\n",
+                               &Style));
+  EXPECT_EQ(llvm::errc::invalid_argument,
+            parseConfiguration("---\n"
+                               "Language: JavaScript\n"
+                               "IndentWidth: 56\n"
+                               "---\n"
+                               "Language: JavaScript\n"
+                               "IndentWidth: 78\n"
+                               "...\n",
+                               &Style));
+
+  EXPECT_EQ(FormatStyle::LK_Cpp, Style.Language);
+}
 
 #undef CHECK_PARSE
 #undef CHECK_PARSE_BOOL
-}
 
 TEST_F(FormatTest, ConfigurationRoundTripTest) {
   FormatStyle Style = getLLVMStyle();
   std::string YAML = configurationAsText(Style);
   FormatStyle ParsedStyle = {};
+  ParsedStyle.Language = FormatStyle::LK_Cpp;
   EXPECT_EQ(0, parseConfiguration(YAML, &ParsedStyle).value());
   EXPECT_EQ(Style, ParsedStyle);
 }
@@ -7457,18 +7560,36 @@ TEST_F(FormatTest, SpacesInAngles) {
 }
 
 TEST_F(FormatTest, UnderstandsJavaScript) {
-  verifyFormat("a == = b;");
-  verifyFormat("a != = b;");
-
-  verifyFormat("a === b;");
-  verifyFormat("aaaaaaa ===\n    b;", getLLVMStyleWithColumns(10));
-  verifyFormat("a !== b;");
-  verifyFormat("aaaaaaa !==\n    b;", getLLVMStyleWithColumns(10));
+  FormatStyle JS = getLLVMStyle();
+  FormatStyle JS10Columns = getLLVMStyleWithColumns(10);
+  FormatStyle JS20Columns = getLLVMStyleWithColumns(20);
+  JS.Language = JS10Columns.Language = JS20Columns.Language =
+      FormatStyle::LK_JavaScript;
+
+  verifyFormat("a == = b;", JS);
+  verifyFormat("a != = b;", JS);
+
+  verifyFormat("a === b;", JS);
+  verifyFormat("aaaaaaa ===\n    b;", JS10Columns);
+  verifyFormat("a !== b;", JS);
+  verifyFormat("aaaaaaa !==\n    b;", JS10Columns);
   verifyFormat("if (a + b + c +\n"
                "        d !==\n"
                "    e + f + g)\n"
                "  q();",
-               getLLVMStyleWithColumns(20));
+               JS20Columns);
+
+  verifyFormat("a >> >= b;", JS);
+
+  verifyFormat("a >>> b;", JS);
+  verifyFormat("aaaaaaa >>>\n    b;", JS10Columns);
+  verifyFormat("a >>>= b;", JS);
+  verifyFormat("aaaaaaa >>>=\n    b;", JS10Columns);
+  verifyFormat("if (a + b + c +\n"
+               "        d >>>\n"
+               "    e + f + g)\n"
+               "  q();",
+               JS20Columns);
 }
 
 } // end namespace tooling





More information about the cfe-commits mailing list