r206157 - Format code around VCS conflict markers.
Manuel Klimek
klimek at google.com
Mon Apr 14 02:14:14 PDT 2014
Author: klimek
Date: Mon Apr 14 04:14:11 2014
New Revision: 206157
URL: http://llvm.org/viewvc/llvm-project?rev=206157&view=rev
Log:
Format code around VCS conflict markers.
Now correctly formats:
{
int a;
void f() {
callme(some(parameter1,
<<<<<<< text by the vcs
parameter2),
||||||| text by the vcs
parameter2),
parameter3,
======= text by the vcs
parameter2, parameter3),
>>>>>>> text by the vcs
otherparameter);
}
}
Modified:
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/lib/Format/FormatToken.h
cfe/trunk/lib/Format/UnwrappedLineParser.cpp
cfe/trunk/lib/Format/UnwrappedLineParser.h
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Apr 14 04:14:11 2014
@@ -1157,7 +1157,8 @@ public:
encoding::Encoding Encoding)
: FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
- IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
+ IdentTable(getFormattingLangOpts()), Encoding(Encoding),
+ FirstInLineIndex(0) {
Lex.SetKeepWhitespaceMode(true);
for (const std::string& ForEachMacro : Style.ForEachMacros)
@@ -1167,9 +1168,12 @@ public:
ArrayRef<FormatToken *> lex() {
assert(Tokens.empty());
+ assert(FirstInLineIndex == 0);
do {
Tokens.push_back(getNextToken());
tryMergePreviousTokens();
+ if (Tokens.back()->NewlinesBefore > 0)
+ FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->Tok.isNot(tok::eof));
return Tokens;
}
@@ -1180,6 +1184,8 @@ private:
void tryMergePreviousTokens() {
if (tryMerge_TMacro())
return;
+ if (tryMergeConflictMarkers())
+ return;
if (Style.Language == FormatStyle::LK_JavaScript) {
static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
@@ -1254,6 +1260,68 @@ private:
return true;
}
+ bool tryMergeConflictMarkers() {
+ if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
+ return false;
+
+ // Conflict lines look like:
+ // <marker> <text from the vcs>
+ // For example:
+ // >>>>>>> /file/in/file/system at revision 1234
+ //
+ // We merge all tokens in a line that starts with a conflict marker
+ // into a single token with a special token type that the unwrapped line
+ // parser will use to correctly rebuild the underlying code.
+
+ FileID ID;
+ // Get the position of the first token in the line.
+ unsigned FirstInLineOffset;
+ std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
+ Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
+ StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
+ // Calculate the offset of the start of the current line.
+ auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
+ if (LineOffset == StringRef::npos) {
+ LineOffset = 0;
+ } else {
+ ++LineOffset;
+ }
+
+ auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
+ StringRef LineStart;
+ if (FirstSpace == StringRef::npos) {
+ LineStart = Buffer.substr(LineOffset);
+ } else {
+ LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
+ }
+
+ TokenType Type = TT_Unknown;
+ if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
+ Type = TT_ConflictStart;
+ } else if (LineStart == "|||||||" || LineStart == "=======" ||
+ LineStart == "====") {
+ Type = TT_ConflictAlternative;
+ } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
+ Type = TT_ConflictEnd;
+ }
+
+ if (Type != TT_Unknown) {
+ FormatToken *Next = Tokens.back();
+
+ Tokens.resize(FirstInLineIndex + 1);
+ // We do not need to build a complete token here, as we will skip it
+ // during parsing anyway (as we must not touch whitespace around conflict
+ // markers).
+ Tokens.back()->Type = Type;
+ Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
+
+ Tokens.push_back(Next);
+ return true;
+ }
+
+ return false;
+ }
+
FormatToken *getNextToken() {
if (GreaterStashed) {
// Create a synthesized second '>' token.
@@ -1401,6 +1469,8 @@ private:
IdentifierTable IdentTable;
encoding::Encoding Encoding;
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+ // Index (in 'Tokens') of the last token that starts a new line.
+ unsigned FirstInLineIndex;
SmallVector<FormatToken *, 16> Tokens;
SmallVector<IdentifierInfo*, 8> ForEachMacros;
Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Mon Apr 14 04:14:11 2014
@@ -33,19 +33,22 @@ enum TokenType {
TT_BlockComment,
TT_CastRParen,
TT_ConditionalExpr,
+ TT_ConflictAlternative,
+ TT_ConflictEnd,
+ TT_ConflictStart,
TT_CtorInitializerColon,
TT_CtorInitializerComma,
TT_DesignatedInitializerPeriod,
TT_DictLiteral,
- TT_ImplicitStringLiteral,
- TT_InlineASMColon,
- TT_InheritanceColon,
TT_FunctionLBrace,
TT_FunctionTypeLParen,
+ TT_ImplicitStringLiteral,
+ TT_InheritanceColon,
+ TT_InlineASMColon,
TT_LambdaLSquare,
TT_LineComment,
- TT_ObjCBlockLParen,
TT_ObjCBlockLBrace,
+ TT_ObjCBlockLParen,
TT_ObjCDecl,
TT_ObjCForIn,
TT_ObjCMethodExpr,
Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Mon Apr 14 04:14:11 2014
@@ -465,14 +465,14 @@ void UnwrappedLineParser::parsePPDirecti
}
}
-void UnwrappedLineParser::pushPPConditional() {
- if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
+void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
+ if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
PPStack.push_back(PP_Unreachable);
else
PPStack.push_back(PP_Conditional);
}
-void UnwrappedLineParser::parsePPIf(bool IfDef) {
+void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
++PPBranchLevel;
assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
@@ -480,37 +480,22 @@ void UnwrappedLineParser::parsePPIf(bool
PPLevelBranchCount.push_back(0);
}
PPChainBranchIndex.push(0);
- nextToken();
- bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
- StringRef(FormatTok->Tok.getLiteralData(),
- FormatTok->Tok.getLength()) == "0") ||
- FormatTok->Tok.is(tok::kw_false);
- if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) {
- PPStack.push_back(PP_Unreachable);
- } else {
- pushPPConditional();
- }
- parsePPUnknown();
+ bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
+ conditionalCompilationCondition(Unreachable || Skip);
}
-void UnwrappedLineParser::parsePPElse() {
+void UnwrappedLineParser::conditionalCompilationAlternative() {
if (!PPStack.empty())
PPStack.pop_back();
assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
if (!PPChainBranchIndex.empty())
++PPChainBranchIndex.top();
- if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
- PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) {
- PPStack.push_back(PP_Unreachable);
- } else {
- pushPPConditional();
- }
- parsePPUnknown();
+ conditionalCompilationCondition(
+ PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
+ PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
}
-void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
-
-void UnwrappedLineParser::parsePPEndIf() {
+void UnwrappedLineParser::conditionalCompilationEnd() {
assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
@@ -524,6 +509,27 @@ void UnwrappedLineParser::parsePPEndIf()
PPChainBranchIndex.pop();
if (!PPStack.empty())
PPStack.pop_back();
+}
+
+void UnwrappedLineParser::parsePPIf(bool IfDef) {
+ nextToken();
+ bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
+ StringRef(FormatTok->Tok.getLiteralData(),
+ FormatTok->Tok.getLength()) == "0") ||
+ FormatTok->Tok.is(tok::kw_false);
+ conditionalCompilationStart(!IfDef && IsLiteralFalse);
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElse() {
+ conditionalCompilationAlternative();
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
+
+void UnwrappedLineParser::parsePPEndIf() {
+ conditionalCompilationEnd();
parsePPUnknown();
}
@@ -1406,6 +1412,19 @@ void UnwrappedLineParser::readToken() {
flushComments(isOnNewLine(*FormatTok));
parsePPDirective();
}
+ while (FormatTok->Type == TT_ConflictStart ||
+ FormatTok->Type == TT_ConflictEnd ||
+ FormatTok->Type == TT_ConflictAlternative) {
+ if (FormatTok->Type == TT_ConflictStart) {
+ conditionalCompilationStart(/*Unreachable=*/false);
+ } else if (FormatTok->Type == TT_ConflictAlternative) {
+ conditionalCompilationAlternative();
+ } else if(FormatTok->Type == TT_ConflictEnd) {
+ conditionalCompilationEnd();
+ }
+ FormatTok = Tokens->getNextToken();
+ FormatTok->MustBreakBefore = true;
+ }
if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
!Line->InPPDirective) {
Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Mon Apr 14 04:14:11 2014
@@ -107,7 +107,16 @@ private:
void flushComments(bool NewlineBeforeNext);
void pushToken(FormatToken *Tok);
void calculateBraceTypes();
- void pushPPConditional();
+
+ // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
+ // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
+ // this branch either cannot be taken (for example '#if false'), or should
+ // not be taken in this round.
+ void conditionalCompilationCondition(bool Unreachable);
+ void conditionalCompilationStart(bool Unreachable);
+ void conditionalCompilationAlternative();
+ void conditionalCompilationEnd();
+
bool isOnNewLine(const FormatToken& FormatTok);
// FIXME: We are constantly running into bugs where Line.Level is incorrectly
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=206157&r1=206156&r2=206157&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Mon Apr 14 04:14:11 2014
@@ -8402,5 +8402,97 @@ TEST_F(FormatTest, HandleUnbalancedImpli
EXPECT_EQ(code, format(code));
}
+TEST_F(FormatTest, HandleConflictMarkers) {
+ // Git/SVN conflict markers.
+ EXPECT_EQ("int a;\n"
+ "void f() {\n"
+ " callme(some(parameter1,\n"
+ "<<<<<<< text by the vcs\n"
+ " parameter2),\n"
+ "||||||| text by the vcs\n"
+ " parameter2),\n"
+ " parameter3,\n"
+ "======= text by the vcs\n"
+ " parameter2, parameter3),\n"
+ ">>>>>>> text by the vcs\n"
+ " otherparameter);\n",
+ format("int a;\n"
+ "void f() {\n"
+ " callme(some(parameter1,\n"
+ "<<<<<<< text by the vcs\n"
+ " parameter2),\n"
+ "||||||| text by the vcs\n"
+ " parameter2),\n"
+ " parameter3,\n"
+ "======= text by the vcs\n"
+ " parameter2,\n"
+ " parameter3),\n"
+ ">>>>>>> text by the vcs\n"
+ " otherparameter);\n"));
+
+ // Perforce markers.
+ EXPECT_EQ("void f() {\n"
+ " function(\n"
+ ">>>> text by the vcs\n"
+ " parameter,\n"
+ "==== text by the vcs\n"
+ " parameter,\n"
+ "==== text by the vcs\n"
+ " parameter,\n"
+ "<<<< text by the vcs\n"
+ " parameter);\n",
+ format("void f() {\n"
+ " function(\n"
+ ">>>> text by the vcs\n"
+ " parameter,\n"
+ "==== text by the vcs\n"
+ " parameter,\n"
+ "==== text by the vcs\n"
+ " parameter,\n"
+ "<<<< text by the vcs\n"
+ " parameter);\n"));
+
+ EXPECT_EQ("<<<<<<<\n"
+ "|||||||\n"
+ "=======\n"
+ ">>>>>>>",
+ format("<<<<<<<\n"
+ "|||||||\n"
+ "=======\n"
+ ">>>>>>>"));
+
+ EXPECT_EQ("<<<<<<<\n"
+ "|||||||\n"
+ "int i;\n"
+ "=======\n"
+ ">>>>>>>",
+ format("<<<<<<<\n"
+ "|||||||\n"
+ "int i;\n"
+ "=======\n"
+ ">>>>>>>"));
+
+ // FIXME: Handle parsing of macros around conflict markers correctly:
+ EXPECT_EQ("#define Macro \\\n"
+ "<<<<<<<\n"
+ "Something \\\n"
+ "|||||||\n"
+ "Else \\\n"
+ "=======\n"
+ "Other \\\n"
+ ">>>>>>>\n"
+ "End int i;\n",
+ format("#define Macro \\\n"
+ "<<<<<<<\n"
+ " Something \\\n"
+ "|||||||\n"
+ " Else \\\n"
+ "=======\n"
+ " Other \\\n"
+ ">>>>>>>\n"
+ " End\n"
+ "int i;\n"));
+}
+
} // end namespace tooling
} // end namespace clang
More information about the cfe-commits
mailing list