r206157 - Format code around VCS conflict markers.

Manuel Klimek klimek at google.com
Mon Apr 14 02:14:14 PDT 2014


Author: klimek
Date: Mon Apr 14 04:14:11 2014
New Revision: 206157

URL: http://llvm.org/viewvc/llvm-project?rev=206157&view=rev
Log:
Format code around VCS conflict markers.

Now correctly formats:
  {
    int a;
    void f() {
      callme(some(parameter1,
  <<<<<<< text by the vcs
                  parameter2),
  ||||||| text by the vcs
                  parameter2),
             parameter3,
  ======= text by the vcs
                  parameter2, parameter3),
  >>>>>>> text by the vcs
             otherparameter);
    }
  }

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/FormatToken.h
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Apr 14 04:14:11 2014
@@ -1157,7 +1157,8 @@ public:
                    encoding::Encoding Encoding)
       : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
-        IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
+        IdentTable(getFormattingLangOpts()), Encoding(Encoding),
+        FirstInLineIndex(0) {
     Lex.SetKeepWhitespaceMode(true);
 
     for (const std::string& ForEachMacro : Style.ForEachMacros)
@@ -1167,9 +1168,12 @@ public:
 
   ArrayRef<FormatToken *> lex() {
     assert(Tokens.empty());
+    assert(FirstInLineIndex == 0);
     do {
       Tokens.push_back(getNextToken());
       tryMergePreviousTokens();
+      if (Tokens.back()->NewlinesBefore > 0)
+        FirstInLineIndex = Tokens.size() - 1;
     } while (Tokens.back()->Tok.isNot(tok::eof));
     return Tokens;
   }
@@ -1180,6 +1184,8 @@ private:
   void tryMergePreviousTokens() {
     if (tryMerge_TMacro())
       return;
+    if (tryMergeConflictMarkers())
+      return;
 
     if (Style.Language == FormatStyle::LK_JavaScript) {
       static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
@@ -1254,6 +1260,68 @@ private:
     return true;
   }
 
+  bool tryMergeConflictMarkers() {
+    if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
+      return false;
+
+    // Conflict lines look like:
+    // <marker> <text from the vcs>
+    // For example:
+    // >>>>>>> /file/in/file/system at revision 1234
+    //
+    // We merge all tokens in a line that starts with a conflict marker
+    // into a single token with a special token type that the unwrapped line
+    // parser will use to correctly rebuild the underlying code.
+
+    FileID ID;
+    // Get the position of the first token in the line.
+    unsigned FirstInLineOffset;
+    std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
+        Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
+    StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
+    // Calculate the offset of the start of the current line.
+    auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
+    if (LineOffset == StringRef::npos) {
+      LineOffset = 0;
+    } else {
+      ++LineOffset;
+    }
+
+    auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
+    StringRef LineStart;
+    if (FirstSpace == StringRef::npos) {
+      LineStart = Buffer.substr(LineOffset);
+    } else {
+      LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
+    }
+
+    TokenType Type = TT_Unknown;
+    if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
+      Type = TT_ConflictStart;
+    } else if (LineStart == "|||||||" || LineStart == "=======" ||
+               LineStart == "====") {
+      Type = TT_ConflictAlternative;
+    } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
+      Type = TT_ConflictEnd;
+    }
+
+    if (Type != TT_Unknown) {
+      FormatToken *Next = Tokens.back();
+
+      Tokens.resize(FirstInLineIndex + 1);
+      // We do not need to build a complete token here, as we will skip it
+      // during parsing anyway (as we must not touch whitespace around conflict
+      // markers).
+      Tokens.back()->Type = Type;
+      Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
+
+      Tokens.push_back(Next);
+      return true;
+    }
+
+    return false;
+  }
+
   FormatToken *getNextToken() {
     if (GreaterStashed) {
       // Create a synthesized second '>' token.
@@ -1401,6 +1469,8 @@ private:
   IdentifierTable IdentTable;
   encoding::Encoding Encoding;
   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+  // Index (in 'Tokens') of the last token that starts a new line.
+  unsigned FirstInLineIndex;
   SmallVector<FormatToken *, 16> Tokens;
   SmallVector<IdentifierInfo*, 8> ForEachMacros;
 

Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Mon Apr 14 04:14:11 2014
@@ -33,19 +33,22 @@ enum TokenType {
   TT_BlockComment,
   TT_CastRParen,
   TT_ConditionalExpr,
+  TT_ConflictAlternative,
+  TT_ConflictEnd,
+  TT_ConflictStart,
   TT_CtorInitializerColon,
   TT_CtorInitializerComma,
   TT_DesignatedInitializerPeriod,
   TT_DictLiteral,
-  TT_ImplicitStringLiteral,
-  TT_InlineASMColon,
-  TT_InheritanceColon,
   TT_FunctionLBrace,
   TT_FunctionTypeLParen,
+  TT_ImplicitStringLiteral,
+  TT_InheritanceColon,
+  TT_InlineASMColon,
   TT_LambdaLSquare,
   TT_LineComment,
-  TT_ObjCBlockLParen,
   TT_ObjCBlockLBrace,
+  TT_ObjCBlockLParen,
   TT_ObjCDecl,
   TT_ObjCForIn,
   TT_ObjCMethodExpr,

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Mon Apr 14 04:14:11 2014
@@ -465,14 +465,14 @@ void UnwrappedLineParser::parsePPDirecti
   }
 }
 
-void UnwrappedLineParser::pushPPConditional() {
-  if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
+void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
+  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
     PPStack.push_back(PP_Unreachable);
   else
     PPStack.push_back(PP_Conditional);
 }
 
-void UnwrappedLineParser::parsePPIf(bool IfDef) {
+void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
   ++PPBranchLevel;
   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
@@ -480,37 +480,22 @@ void UnwrappedLineParser::parsePPIf(bool
     PPLevelBranchCount.push_back(0);
   }
   PPChainBranchIndex.push(0);
-  nextToken();
-  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
-                         StringRef(FormatTok->Tok.getLiteralData(),
-                                   FormatTok->Tok.getLength()) == "0") ||
-                        FormatTok->Tok.is(tok::kw_false);
-  if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) {
-    PPStack.push_back(PP_Unreachable);
-  } else {
-    pushPPConditional();
-  }
-  parsePPUnknown();
+  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
+  conditionalCompilationCondition(Unreachable || Skip);
 }
 
-void UnwrappedLineParser::parsePPElse() {
+void UnwrappedLineParser::conditionalCompilationAlternative() {
   if (!PPStack.empty())
     PPStack.pop_back();
   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
   if (!PPChainBranchIndex.empty())
     ++PPChainBranchIndex.top();
-  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
-      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) {
-    PPStack.push_back(PP_Unreachable);
-  } else {
-    pushPPConditional();
-  }
-  parsePPUnknown();
+  conditionalCompilationCondition(
+      PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
+      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 }
 
-void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
-
-void UnwrappedLineParser::parsePPEndIf() {
+void UnwrappedLineParser::conditionalCompilationEnd() {
   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
@@ -524,6 +509,27 @@ void UnwrappedLineParser::parsePPEndIf()
     PPChainBranchIndex.pop();
   if (!PPStack.empty())
     PPStack.pop_back();
+}
+
+void UnwrappedLineParser::parsePPIf(bool IfDef) {
+  nextToken();
+  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
+                         StringRef(FormatTok->Tok.getLiteralData(),
+                                   FormatTok->Tok.getLength()) == "0") ||
+                        FormatTok->Tok.is(tok::kw_false);
+  conditionalCompilationStart(!IfDef && IsLiteralFalse);
+  parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElse() {
+  conditionalCompilationAlternative();
+  parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
+
+void UnwrappedLineParser::parsePPEndIf() {
+  conditionalCompilationEnd();
   parsePPUnknown();
 }
 
@@ -1406,6 +1412,19 @@ void UnwrappedLineParser::readToken() {
       flushComments(isOnNewLine(*FormatTok));
       parsePPDirective();
     }
+    while (FormatTok->Type == TT_ConflictStart ||
+           FormatTok->Type == TT_ConflictEnd ||
+           FormatTok->Type == TT_ConflictAlternative) {
+      if (FormatTok->Type == TT_ConflictStart) {
+        conditionalCompilationStart(/*Unreachable=*/false);
+      } else if (FormatTok->Type == TT_ConflictAlternative) {
+        conditionalCompilationAlternative();
+      } else if(FormatTok->Type == TT_ConflictEnd) {
+        conditionalCompilationEnd();
+      }
+      FormatTok = Tokens->getNextToken();
+      FormatTok->MustBreakBefore = true;
+    }
 
     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
         !Line->InPPDirective) {

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Mon Apr 14 04:14:11 2014
@@ -107,7 +107,16 @@ private:
   void flushComments(bool NewlineBeforeNext);
   void pushToken(FormatToken *Tok);
   void calculateBraceTypes();
-  void pushPPConditional();
+
+  // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
+  // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
+  // this branch either cannot be taken (for example '#if false'), or should
+  // not be taken in this round.
+  void conditionalCompilationCondition(bool Unreachable);
+  void conditionalCompilationStart(bool Unreachable);
+  void conditionalCompilationAlternative();
+  void conditionalCompilationEnd();
+
   bool isOnNewLine(const FormatToken& FormatTok);
 
   // FIXME: We are constantly running into bugs where Line.Level is incorrectly

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Mon Apr 14 04:14:11 2014
@@ -8402,5 +8402,97 @@ TEST_F(FormatTest, HandleUnbalancedImpli
   EXPECT_EQ(code, format(code));
 }
 
+TEST_F(FormatTest, HandleConflictMarkers) {
+  // Git/SVN conflict markers.
+  EXPECT_EQ("int a;\n"
+            "void f() {\n"
+            "  callme(some(parameter1,\n"
+            "<<<<<<< text by the vcs\n"
+            "              parameter2),\n"
+            "||||||| text by the vcs\n"
+            "              parameter2),\n"
+            "         parameter3,\n"
+            "======= text by the vcs\n"
+            "              parameter2, parameter3),\n"
+            ">>>>>>> text by the vcs\n"
+            "         otherparameter);\n",
+            format("int a;\n"
+                   "void f() {\n"
+                   "  callme(some(parameter1,\n"
+                   "<<<<<<< text by the vcs\n"
+                   "  parameter2),\n"
+                   "||||||| text by the vcs\n"
+                   "  parameter2),\n"
+                   "  parameter3,\n"
+                   "======= text by the vcs\n"
+                   "  parameter2,\n"
+                   "  parameter3),\n"
+                   ">>>>>>> text by the vcs\n"
+                   "  otherparameter);\n"));
+
+  // Perforce markers.
+  EXPECT_EQ("void f() {\n"
+            "  function(\n"
+            ">>>> text by the vcs\n"
+            "      parameter,\n"
+            "==== text by the vcs\n"
+            "      parameter,\n"
+            "==== text by the vcs\n"
+            "      parameter,\n"
+            "<<<< text by the vcs\n"
+            "      parameter);\n",
+            format("void f() {\n"
+                   "  function(\n"
+                   ">>>> text by the vcs\n"
+                   "  parameter,\n"
+                   "==== text by the vcs\n"
+                   "  parameter,\n"
+                   "==== text by the vcs\n"
+                   "  parameter,\n"
+                   "<<<< text by the vcs\n"
+                   "  parameter);\n"));
+
+  EXPECT_EQ("<<<<<<<\n"
+            "|||||||\n"
+            "=======\n"
+            ">>>>>>>",
+            format("<<<<<<<\n"
+                   "|||||||\n"
+                   "=======\n"
+                   ">>>>>>>"));
+
+  EXPECT_EQ("<<<<<<<\n"
+            "|||||||\n"
+            "int i;\n"
+            "=======\n"
+            ">>>>>>>",
+            format("<<<<<<<\n"
+                   "|||||||\n"
+                   "int i;\n"
+                   "=======\n"
+                   ">>>>>>>"));
+
+  // FIXME: Handle parsing of macros around conflict markers correctly:
+  EXPECT_EQ("#define Macro \\\n"
+            "<<<<<<<\n"
+            "Something \\\n"
+            "|||||||\n"
+            "Else \\\n"
+            "=======\n"
+            "Other \\\n"
+            ">>>>>>>\n"
+            "End int i;\n",
+            format("#define Macro \\\n"
+                   "<<<<<<<\n"
+                   "  Something \\\n"
+                   "|||||||\n"
+                   "  Else \\\n"
+                   "=======\n"
+                   "  Other \\\n"
+                   ">>>>>>>\n"
+                   "  End\n"
+                   "int i;\n"));
+}
+
 } // end namespace tooling
 } // end namespace clang





More information about the cfe-commits mailing list