r190804 - When in pre-c++11 mode, treat _T("xxx") as a single string literal, repeat the _T() part around each fragment. This addresses http://llvm.org/PR17122
Alexander Kornienko
alexfh at google.com
Mon Sep 16 13:20:49 PDT 2013
Author: alexfh
Date: Mon Sep 16 15:20:49 2013
New Revision: 190804
URL: http://llvm.org/viewvc/llvm-project?rev=190804&view=rev
Log:
When in pre-c++11 mode, treat _T("xxx") as a single string literal, repeat the _T() part around each fragment. This addresses http://llvm.org/PR17122
Reviewers: djasper
Reviewed By: djasper
CC: cfe-commits, klimek, rsmith
Differential Revision: http://llvm-reviews.chandlerc.com/D1640
Modified:
cfe/trunk/lib/Format/BreakableToken.cpp
cfe/trunk/lib/Format/BreakableToken.h
cfe/trunk/lib/Format/ContinuationIndenter.cpp
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/lib/Format/WhitespaceManager.cpp
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=190804&r1=190803&r2=190804&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Mon Sep 16 15:20:49 2013
@@ -82,20 +82,18 @@ static BreakableToken::Split getCommentS
}
static BreakableToken::Split getStringSplit(StringRef Text,
- unsigned ContentStartColumn,
+ unsigned UsedColumns,
unsigned ColumnLimit,
unsigned TabWidth,
encoding::Encoding Encoding) {
// FIXME: Reduce unit test case.
if (Text.empty())
return BreakableToken::Split(StringRef::npos, 0);
- if (ColumnLimit <= ContentStartColumn)
+ if (ColumnLimit <= UsedColumns)
return BreakableToken::Split(StringRef::npos, 0);
- unsigned MaxSplit =
- std::min<unsigned>(ColumnLimit - ContentStartColumn,
- encoding::columnWidthWithTabs(Text, ContentStartColumn,
- TabWidth, Encoding) -
- 1);
+ unsigned MaxSplit = std::min<unsigned>(
+ ColumnLimit - UsedColumns,
+ encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1);
StringRef::size_type SpaceOffset = 0;
StringRef::size_type SlashOffset = 0;
StringRef::size_type WordStartOffset = 0;
@@ -107,9 +105,8 @@ static BreakableToken::Split getStringSp
Chars += Advance;
} else {
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
- Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
- ContentStartColumn + Chars,
- TabWidth, Encoding);
+ Chars += encoding::columnWidthWithTabs(
+ Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
}
if (Chars > MaxSplit)
@@ -158,19 +155,19 @@ BreakableSingleLineToken::BreakableSingl
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
}
-BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
- unsigned StartColumn,
- bool InPPDirective,
- encoding::Encoding Encoding,
- const FormatStyle &Style)
- : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
+BreakableStringLiteral::BreakableStringLiteral(
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
Encoding, Style) {}
BreakableToken::Split
BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const {
- return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
- Style.TabWidth, Encoding);
+ return getStringSplit(Line.substr(TailOffset),
+ StartColumn + Prefix.size() + Postfix.size(),
+ ColumnLimit, Style.TabWidth, Encoding);
}
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=190804&r1=190803&r2=190804&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Mon Sep 16 15:20:49 2013
@@ -110,6 +110,7 @@ public:
/// \p StartColumn specifies the column in which the token will start
/// after formatting.
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix,
bool InPPDirective, encoding::Encoding Encoding,
const FormatStyle &Style);
Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=190804&r1=190803&r2=190804&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Mon Sep 16 15:20:49 2013
@@ -633,6 +633,25 @@ unsigned ContinuationIndenter::addMultil
return 0;
}
+static bool getRawStringLiteralPrefixPostfix(StringRef Text,
+ StringRef &Prefix,
+ StringRef &Postfix) {
+ if (Text.startswith(Prefix = "R\"") || Text.startswith(Prefix = "uR\"") ||
+ Text.startswith(Prefix = "UR\"") || Text.startswith(Prefix = "u8R\"") ||
+ Text.startswith(Prefix = "LR\"")) {
+ size_t ParenPos = Text.find('(');
+ if (ParenPos != StringRef::npos) {
+ StringRef Delimiter =
+ Text.substr(Prefix.size(), ParenPos - Prefix.size());
+ Prefix = Text.substr(0, ParenPos + 1);
+ Postfix = Text.substr(Text.size() - 2 - Delimiter.size());
+ return Postfix.front() == ')' && Postfix.back() == '"' &&
+ Postfix.substr(1).startswith(Delimiter);
+ }
+ }
+ return false;
+}
+
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
LineState &State,
bool DryRun) {
@@ -641,24 +660,42 @@ unsigned ContinuationIndenter::breakProt
if (Current.Type != TT_BlockComment && Current.IsMultiline)
return addMultilineToken(Current, State);
- if (!Current.isOneOf(tok::string_literal, tok::comment))
+ if (!Current.isOneOf(tok::string_literal, tok::wide_string_literal,
+ tok::utf8_string_literal, tok::utf16_string_literal,
+ tok::utf32_string_literal, tok::comment))
return 0;
llvm::OwningPtr<BreakableToken> Token;
unsigned StartColumn = State.Column - Current.ColumnWidth;
- if (Current.is(tok::string_literal) &&
+ if (Current.isOneOf(tok::string_literal, tok::wide_string_literal,
+ tok::utf8_string_literal, tok::utf16_string_literal,
+ tok::utf32_string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
- // Only break up default narrow strings.
- if (!Current.TokenText.startswith("\""))
- return 0;
// Exempts unterminated string literals from line breaking. The user will
// likely want to terminate the string before any line breaking is done.
if (Current.IsUnterminatedLiteral)
return 0;
- Token.reset(new BreakableStringLiteral(
- Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
+ StringRef Text = Current.TokenText;
+ StringRef Prefix;
+ StringRef Postfix;
+ // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
+ // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
+ // reduce the overhead) for each FormatToken, which is a string, so that we
+ // don't run multiple checks here on the hot path.
+ if ((Text.endswith(Postfix = "\"") &&
+ (Text.startswith(Prefix = "\"") || Text.startswith(Prefix = "u\"") ||
+ Text.startswith(Prefix = "U\"") || Text.startswith(Prefix = "u8\"") ||
+ Text.startswith(Prefix = "L\""))) ||
+ (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")")) ||
+ getRawStringLiteralPrefixPostfix(Text, Prefix, Postfix)) {
+ Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
+ Postfix, State.Line->InPPDirective,
+ Encoding, Style));
+ } else {
+ return 0;
+ }
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
Token.reset(new BreakableBlockComment(
Current, StartColumn, Current.OriginalColumn, !Current.Previous,
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=190804&r1=190803&r2=190804&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Sep 16 15:20:49 2013
@@ -591,6 +591,7 @@ public:
assert(Tokens.empty());
do {
Tokens.push_back(getNextToken());
+ maybeJoinPreviousTokens();
} while (Tokens.back()->Tok.isNot(tok::eof));
return Tokens;
}
@@ -598,6 +599,40 @@ public:
IdentifierTable &getIdentTable() { return IdentTable; }
private:
+ void maybeJoinPreviousTokens() {
+ if (Tokens.size() < 4)
+ return;
+ FormatToken *Last = Tokens.back();
+ if (!Last->is(tok::r_paren))
+ return;
+
+ FormatToken *String = Tokens[Tokens.size() - 2];
+ if (!String->is(tok::string_literal) || String->IsMultiline)
+ return;
+
+ if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
+ return;
+
+ FormatToken *Macro = Tokens[Tokens.size() - 4];
+ if (Macro->TokenText != "_T")
+ return;
+
+ const char *Start = Macro->TokenText.data();
+ const char *End = Last->TokenText.data() + Last->TokenText.size();
+ String->TokenText = StringRef(Start, End - Start);
+ String->IsFirst = Macro->IsFirst;
+ String->LastNewlineOffset = Macro->LastNewlineOffset;
+ String->WhitespaceRange = Macro->WhitespaceRange;
+ String->OriginalColumn = Macro->OriginalColumn;
+ String->ColumnWidth = encoding::columnWidthWithTabs(
+ String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.back() = String;
+ }
+
FormatToken *getNextToken() {
if (GreaterStashed) {
// Create a synthesized second '>' token.
@@ -1135,7 +1170,8 @@ private:
CharSourceRange LineRange = CharSourceRange::getCharRange(
First->WhitespaceRange.getBegin().getLocWithOffset(
First->LastNewlineOffset),
- Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1));
+ Last->getStartOfNonWhitespace().getLocWithOffset(
+ Last->TokenText.size() - 1));
return touchesRanges(LineRange);
}
Modified: cfe/trunk/lib/Format/WhitespaceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.cpp?rev=190804&r1=190803&r2=190804&view=diff
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.cpp (original)
+++ cfe/trunk/lib/Format/WhitespaceManager.cpp Mon Sep 16 15:20:49 2013
@@ -49,11 +49,9 @@ void WhitespaceManager::replaceWhitespac
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
bool InPPDirective) {
- Changes.push_back(
- Change(false, Tok.WhitespaceRange, /*Spaces=*/0,
- SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1,
- Tok.NewlinesBefore, "", "", Tok.Tok.getKind(),
- InPPDirective && !Tok.IsFirst));
+ Changes.push_back(Change(false, Tok.WhitespaceRange, /*Spaces=*/0,
+ Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
+ Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst));
}
void WhitespaceManager::replaceWhitespaceInToken(
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=190804&r1=190803&r2=190804&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Mon Sep 16 15:20:49 2013
@@ -5468,6 +5468,66 @@ TEST_F(FormatTest, BreakStringLiterals)
format("#define A \"some text other\";", AlignLeft));
}
+TEST_F(FormatTest, BreaksWideStringLiterals) {
+ EXPECT_EQ(
+ "u8\"utf8 string \"\n"
+ "u8\"literal\";",
+ format("u8\"utf8 string literal\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ(
+ "u\"utf16 string \"\n"
+ "u\"literal\";",
+ format("u\"utf16 string literal\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ(
+ "U\"utf32 string \"\n"
+ "U\"literal\";",
+ format("U\"utf32 string literal\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ("L\"wide string \"\n"
+ "L\"literal\";",
+ format("L\"wide string literal\";", getGoogleStyleWithColumns(16)));
+}
+
+TEST_F(FormatTest, BreaksRawStringLiterals) {
+ EXPECT_EQ("R\"x(raw )x\"\n"
+ "R\"x(literal)x\";",
+ format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15)));
+ EXPECT_EQ("uR\"x(raw )x\"\n"
+ "uR\"x(literal)x\";",
+ format("uR\"x(raw literal)x\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ("u8R\"x(raw )x\"\n"
+ "u8R\"x(literal)x\";",
+ format("u8R\"x(raw literal)x\";", getGoogleStyleWithColumns(17)));
+ EXPECT_EQ("LR\"x(raw )x\"\n"
+ "LR\"x(literal)x\";",
+ format("LR\"x(raw literal)x\";", getGoogleStyleWithColumns(16)));
+ EXPECT_EQ("UR\"x(raw )x\"\n"
+ "UR\"x(literal)x\";",
+ format("UR\"x(raw literal)x\";", getGoogleStyleWithColumns(16)));
+}
+
+TEST_F(FormatTest, BreaksStringLiteralsWithin_TMacro) {
+ FormatStyle Style = getLLVMStyleWithColumns(20);
+ EXPECT_EQ(
+ "_T(\"aaaaaaaaaaaaaa\")\n"
+ "_T(\"aaaaaaaaaaaaaa\")\n"
+ "_T(\"aaaaaaaaaaaa\")",
+ format(" _T(\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\")", Style));
+ EXPECT_EQ("f(x, _T(\"aaaaaaaaa\")\n"
+ " _T(\"aaaaaa\"),\n"
+ " z);",
+ format("f(x, _T(\"aaaaaaaaaaaaaaa\"), z);", Style));
+
+ // FIXME: Handle embedded spaces in one iteration.
+ // EXPECT_EQ("_T(\"aaaaaaaaaaaaa\")\n"
+ // "_T(\"aaaaaaaaaaaaa\")\n"
+ // "_T(\"aaaaaaaaaaaaa\")\n"
+ // "_T(\"a\")",
+ // format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )",
+ // getLLVMStyleWithColumns(20)));
+ EXPECT_EQ(
+ "_T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )",
+ format(" _T ( \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" )", Style));
+}
+
TEST_F(FormatTest, DontSplitStringLiteralsWithEscapedNewlines) {
EXPECT_EQ(
"aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
@@ -5519,17 +5579,6 @@ TEST_F(FormatTest, CountsCharactersInMul
}
TEST_F(FormatTest, SkipsUnknownStringLiterals) {
- EXPECT_EQ(
- "u8\"unsupported literal\";",
- format("u8\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("u\"unsupported literal\";",
- format("u\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("U\"unsupported literal\";",
- format("U\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("L\"unsupported literal\";",
- format("L\"unsupported literal\";", getGoogleStyleWithColumns(15)));
- EXPECT_EQ("R\"x(raw literal)x\";",
- format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15)));
verifyFormat("string a = \"unterminated;");
EXPECT_EQ("function(\"unterminated,\n"
" OtherParameter);",
@@ -5626,7 +5675,10 @@ TEST_F(FormatTest, DoNotBreakStringLiter
"\"00000000\"\n"
"\"1\"",
format("\"test\\000000000001\"", getLLVMStyleWithColumns(10)));
- EXPECT_EQ("R\"(\\x\\x00)\"\n",
+ // FIXME: We probably don't need to care about escape sequences in raw
+ // literals.
+ EXPECT_EQ("R\"(\\x)\"\n"
+ "R\"(\\x00)\"\n",
format("R\"(\\x\\x00)\"\n", getGoogleStyleWithColumns(7)));
}
More information about the cfe-commits
mailing list