r174504 - Optionally derive formatting information from the input file.

Daniel Jasper djasper at google.com
Wed Feb 6 06:22:41 PST 2013


Author: djasper
Date: Wed Feb  6 08:22:40 2013
New Revision: 174504

URL: http://llvm.org/viewvc/llvm-project?rev=174504&view=rev
Log:
Optionally derive formatting information from the input file.

With this patch, clang-format can analyze the input file for two
properties:
1. Is "int *a" or "int* a" more common.
2. Are non-C++03 constructs used, e.g. A<A<A>>.

With Google-style, clang-format will now use the more common style for
(1) and format C++03 compatible, unless it finds C++11 constructs in the
input.

Modified:
    cfe/trunk/include/clang/Format/Format.h
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/lib/Format/TokenAnnotator.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/include/clang/Format/Format.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Format/Format.h?rev=174504&r1=174503&r2=174504&view=diff
==============================================================================
--- cfe/trunk/include/clang/Format/Format.h (original)
+++ cfe/trunk/include/clang/Format/Format.h Wed Feb  6 08:22:40 2013
@@ -39,14 +39,26 @@ struct FormatStyle {
   unsigned MaxEmptyLinesToKeep;
 
   /// \brief Set whether & and * bind to the type as opposed to the variable.
-  bool PointerAndReferenceBindToType;
+  bool PointerBindsToType;
+
+  /// \brief If \c true, analyze the formatted file for the most common binding.
+  bool DerivePointerBinding;
 
   /// \brief The extra indent or outdent of access modifiers (e.g.: public:).
   int AccessModifierOffset;
 
-  /// \brief Split two consecutive closing '>' by a space, i.e. use
-  /// A<A<int> > instead of A<A<int>>.
-  bool SplitTemplateClosingGreater;
+  enum LanguageStandard {
+    LS_Cpp03,
+    LS_Cpp11,
+    LS_Auto
+  };
+
+  /// \brief Format compatible with this standard, e.g. use \c A<A<int> >
+  /// instead of \c A<A<int>> for LS_Cpp03.
+  LanguageStandard Standard;
+
+  /// \brief If \c true, analyze the formatted file for C++03 compatibility.
+  bool DeriveBackwardsCompatibility;
 
   /// \brief Indent case labels one level from the switch statement.
   ///

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174504&r1=174503&r2=174504&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Wed Feb  6 08:22:40 2013
@@ -36,9 +36,10 @@ FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
   LLVMStyle.ColumnLimit = 80;
   LLVMStyle.MaxEmptyLinesToKeep = 1;
-  LLVMStyle.PointerAndReferenceBindToType = false;
+  LLVMStyle.PointerBindsToType = false;
+  LLVMStyle.DerivePointerBinding = false;
   LLVMStyle.AccessModifierOffset = -2;
-  LLVMStyle.SplitTemplateClosingGreater = true;
+  LLVMStyle.Standard = FormatStyle::LS_Cpp03;
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.SpacesBeforeTrailingComments = 1;
   LLVMStyle.BinPackParameters = true;
@@ -55,9 +56,10 @@ FormatStyle getGoogleStyle() {
   FormatStyle GoogleStyle;
   GoogleStyle.ColumnLimit = 80;
   GoogleStyle.MaxEmptyLinesToKeep = 1;
-  GoogleStyle.PointerAndReferenceBindToType = true;
+  GoogleStyle.PointerBindsToType = true;
+  GoogleStyle.DerivePointerBinding = true;
   GoogleStyle.AccessModifierOffset = -1;
-  GoogleStyle.SplitTemplateClosingGreater = false;
+  GoogleStyle.Standard = FormatStyle::LS_Auto;
   GoogleStyle.IndentCaseLabels = true;
   GoogleStyle.SpacesBeforeTrailingComments = 2;
   GoogleStyle.BinPackParameters = false;
@@ -73,7 +75,8 @@ FormatStyle getGoogleStyle() {
 FormatStyle getChromiumStyle() {
   FormatStyle ChromiumStyle = getGoogleStyle();
   ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
-  ChromiumStyle.SplitTemplateClosingGreater = true;
+  ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
+  ChromiumStyle.DerivePointerBinding = false;
   return ChromiumStyle;
 }
 
@@ -838,14 +841,54 @@ public:
 
   virtual ~Formatter() {}
 
+  void deriveLocalStyle() {
+    unsigned CountBoundToVariable = 0;
+    unsigned CountBoundToType = 0;
+    bool HasCpp03IncompatibleFormat = false;
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      if (AnnotatedLines[i].First.Children.empty())
+        continue;
+      AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
+      while (!Tok->Children.empty()) {
+        if (Tok->Type == TT_PointerOrReference) {
+          bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
+          bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
+          if (SpacesBefore && !SpacesAfter)
+            ++CountBoundToVariable;
+          else if (!SpacesBefore && SpacesAfter)
+            ++CountBoundToType;
+        }
+
+        if (Tok->Type == TT_TemplateCloser && Tok->Parent->Type ==
+            TT_TemplateCloser && Tok->FormatTok.WhiteSpaceLength == 0)
+          HasCpp03IncompatibleFormat = true;
+        Tok = &Tok->Children[0];
+      }
+    }
+    if (Style.DerivePointerBinding) {
+      if (CountBoundToType > CountBoundToVariable)
+        Style.PointerBindsToType = true;
+      else if (CountBoundToType < CountBoundToVariable)
+        Style.PointerBindsToType = false;
+    }
+    if (Style.Standard == FormatStyle::LS_Auto) {
+      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
+                                                  : FormatStyle::LS_Cpp03;
+    }
+  }
+
   tooling::Replacements format() {
     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
     StructuralError = Parser.parse();
     unsigned PreviousEndOfLineColumn = 0;
+    TokenAnnotator Annotator(Style, SourceMgr, Lex);
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      Annotator.annotate(AnnotatedLines[i]);
+    }
+    deriveLocalStyle();
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-      TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
-      Annotator.annotate();
+      Annotator.calculateFormattingInformation(AnnotatedLines[i]);
     }
     for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
                                               E = AnnotatedLines.end();

Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=174504&r1=174503&r2=174504&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Wed Feb  6 08:22:40 2013
@@ -678,7 +678,7 @@ private:
   bool KeywordVirtualFound;
 };
 
-void TokenAnnotator::annotate() {
+void TokenAnnotator::annotate(AnnotatedLine &Line) {
   AnnotatingParser Parser(SourceMgr, Lex, Line);
   Line.Type = Parser.parseLine();
   if (Line.Type == LT_Invalid)
@@ -696,45 +696,51 @@ void TokenAnnotator::annotate() {
   Line.First.CanBreakBefore = Line.First.MustBreakBefore;
 
   Line.First.TotalLength = Line.First.FormatTok.TokenLength;
-  if (!Line.First.Children.empty())
-    calculateFormattingInformation(Line.First.Children[0]);
 }
 
-void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
-  Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
+void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
+  if (Line.First.Children.empty())
+    return;
+  AnnotatedToken *Current = &Line.First.Children[0];
+  while (Current != NULL) {
+    Current->SpaceRequiredBefore = spaceRequiredBefore(Line, *Current);
+
+    if (Current->FormatTok.MustBreakBefore) {
+      Current->MustBreakBefore = true;
+    } else if (Current->Type == TT_LineComment) {
+      Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
+    } else if ((Current->Parent->is(tok::comment) &&
+                Current->FormatTok.NewlinesBefore > 0) ||
+               (Current->is(tok::string_literal) &&
+                Current->Parent->is(tok::string_literal))) {
+      Current->MustBreakBefore = true;
+    } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
+               Current->Parent->is(tok::string_literal) &&
+               Current->Children[0].is(tok::string_literal)) {
+      Current->MustBreakBefore = true;
+    } else {
+      Current->MustBreakBefore = false;
+    }
+    Current->CanBreakBefore =
+        Current->MustBreakBefore || canBreakBefore(Line, *Current);
+    if (Current->MustBreakBefore)
+      Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
+    else
+      Current->TotalLength =
+          Current->Parent->TotalLength + Current->FormatTok.TokenLength +
+          (Current->SpaceRequiredBefore ? 1 : 0);
+    // FIXME: Only calculate this if CanBreakBefore is true once static
+    // initializers etc. are sorted out.
+    // FIXME: Move magic numbers to a better place.
+    Current->SplitPenalty =
+        20 * Current->BindingStrength + splitPenalty(Line, *Current);
 
-  if (Current.FormatTok.MustBreakBefore) {
-    Current.MustBreakBefore = true;
-  } else if (Current.Type == TT_LineComment) {
-    Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
-  } else if ((Current.Parent->is(tok::comment) &&
-              Current.FormatTok.NewlinesBefore > 0) ||
-             (Current.is(tok::string_literal) &&
-              Current.Parent->is(tok::string_literal))) {
-    Current.MustBreakBefore = true;
-  } else if (Current.is(tok::lessless) && !Current.Children.empty() &&
-             Current.Parent->is(tok::string_literal) &&
-             Current.Children[0].is(tok::string_literal)) {
-    Current.MustBreakBefore = true;
-  } else {
-    Current.MustBreakBefore = false;
+    Current = Current->Children.empty() ? NULL : &Current->Children[0];
   }
-  Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
-  if (Current.MustBreakBefore)
-    Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
-  else
-    Current.TotalLength =
-        Current.Parent->TotalLength + Current.FormatTok.TokenLength +
-        (Current.SpaceRequiredBefore ? 1 : 0);
-  // FIXME: Only calculate this if CanBreakBefore is true once static
-  // initializers etc. are sorted out.
-  // FIXME: Move magic numbers to a better place.
-  Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
-  if (!Current.Children.empty())
-    calculateFormattingInformation(Current.Children[0]);
 }
 
-unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
+unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
+                                      const AnnotatedToken &Tok) {
   const AnnotatedToken &Left = *Tok.Parent;
   const AnnotatedToken &Right = Tok;
 
@@ -787,7 +793,8 @@ unsigned TokenAnnotator::splitPenalty(co
   return 3;
 }
 
-bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
+bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
+                                          const AnnotatedToken &Left,
                                           const AnnotatedToken &Right) {
   if (Right.is(tok::hashhash))
     return Left.is(tok::hash);
@@ -818,10 +825,10 @@ bool TokenAnnotator::spaceRequiredBetwee
   if (Right.is(tok::amp) || Right.is(tok::star))
     return Left.FormatTok.Tok.isLiteral() ||
            (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
-            !Style.PointerAndReferenceBindToType);
+            !Style.PointerBindsToType);
   if (Left.is(tok::amp) || Left.is(tok::star))
     return Right.FormatTok.Tok.isLiteral() ||
-           Style.PointerAndReferenceBindToType;
+           Style.PointerBindsToType;
   if (Right.is(tok::star) && Left.is(tok::l_paren))
     return false;
   if (Left.is(tok::l_square) || Right.is(tok::r_square))
@@ -851,7 +858,8 @@ bool TokenAnnotator::spaceRequiredBetwee
   return true;
 }
 
-bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
+bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
+                                         const AnnotatedToken &Tok) {
   if (Line.Type == LT_ObjCMethodDecl) {
     if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
         Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
@@ -892,7 +900,7 @@ bool TokenAnnotator::spaceRequiredBefore
             Tok.Parent->Type != TT_ObjCMethodExpr);
   if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
     return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
-           TT_TemplateCloser && Style.SplitTemplateClosingGreater;
+           TT_TemplateCloser && Style.Standard != FormatStyle::LS_Cpp11;
   }
   if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
     return true;
@@ -902,10 +910,11 @@ bool TokenAnnotator::spaceRequiredBefore
     return true;
   if (Tok.Type == TT_TrailingUnaryOperator)
     return false;
-  return spaceRequiredBetween(*Tok.Parent, Tok);
+  return spaceRequiredBetween(Line, *Tok.Parent, Tok);
 }
 
-bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
+bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
+                                    const AnnotatedToken &Right) {
   const AnnotatedToken &Left = *Right.Parent;
   if (Line.Type == LT_ObjCMethodDecl) {
     if (Right.is(tok::identifier) && !Right.Children.empty() &&

Modified: cfe/trunk/lib/Format/TokenAnnotator.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.h?rev=174504&r1=174503&r2=174504&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.h (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.h Wed Feb  6 08:22:40 2013
@@ -171,29 +171,29 @@ inline prec::Level getPrecedence(const A
 /// \c UnwrappedLine.
 class TokenAnnotator {
 public:
-  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
-                 AnnotatedLine &Line)
-      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {
+  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex)
+      : Style(Style), SourceMgr(SourceMgr), Lex(Lex) {
   }
 
-  void annotate();
-  void calculateFormattingInformation(AnnotatedToken &Current);
+  void annotate(AnnotatedLine &Line);
+  void calculateFormattingInformation(AnnotatedLine &Line);
 
 private:
   /// \brief Calculate the penalty for splitting before \c Tok.
-  unsigned splitPenalty(const AnnotatedToken &Tok);
+  unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
 
-  bool spaceRequiredBetween(const AnnotatedToken &Left,
+  bool spaceRequiredBetween(const AnnotatedLine &Line,
+                            const AnnotatedToken &Left,
                             const AnnotatedToken &Right);
 
-  bool spaceRequiredBefore(const AnnotatedToken &Tok);
+  bool spaceRequiredBefore(const AnnotatedLine &Line,
+                           const AnnotatedToken &Tok);
 
-  bool canBreakBefore(const AnnotatedToken &Right);
+  bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
 
-  FormatStyle Style;
+  const FormatStyle &Style;
   SourceManager &SourceMgr;
   Lexer &Lex;
-  AnnotatedLine &Line;
 };
 
 } // end namespace format

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=174504&r1=174503&r2=174504&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Feb  6 08:22:40 2013
@@ -1437,6 +1437,11 @@ TEST_F(FormatTest, UnderstandsTemplatePa
   verifyGoogleFormat("A<A<int>> a;");
   verifyGoogleFormat("A<A<A<int>>> a;");
   verifyGoogleFormat("A<A<A<A<int>>>> a;");
+  verifyGoogleFormat("A<A<int> > a;");
+  verifyGoogleFormat("A<A<A<int> > > a;");
+  verifyGoogleFormat("A<A<A<A<int> > > > a;");
+  EXPECT_EQ("A<A<A<A>>> a;", format("A<A<A<A> >> a;", getGoogleStyle()));
+  EXPECT_EQ("A<A<A<A>>> a;", format("A<A<A<A>> > a;", getGoogleStyle()));
 
   verifyFormat("test >> a >> b;");
   verifyFormat("test << a >> b;");
@@ -1597,6 +1602,22 @@ TEST_F(FormatTest, UnderstandsUsesOfStar
 
   verifyIndependentOfContext("A = new SomeType *[Length]();");
   verifyGoogleFormat("A = new SomeType* [Length]();");
+
+  EXPECT_EQ("int *a;\n"
+            "int *a;\n"
+            "int *a;", format("int *a;\n"
+                              "int* a;\n"
+                              "int *a;", getGoogleStyle()));
+  EXPECT_EQ("int* a;\n"
+            "int* a;\n"
+            "int* a;", format("int* a;\n"
+                              "int* a;\n"
+                              "int *a;", getGoogleStyle()));
+  EXPECT_EQ("int *a;\n"
+            "int *a;\n"
+            "int *a;", format("int *a;\n"
+                              "int * a;\n"
+                              "int *  a;", getGoogleStyle()));
 }
 
 TEST_F(FormatTest, FormatsBinaryOperatorsPrecedingEquals) {





More information about the cfe-commits mailing list