r194576 - Correctly mark first token in the presence of UTF-8 BOM.
Alexander Kornienko
alexfh at google.com
Wed Nov 13 06:04:17 PST 2013
Author: alexfh
Date: Wed Nov 13 08:04:17 2013
New Revision: 194576
URL: http://llvm.org/viewvc/llvm-project?rev=194576&view=rev
Log:
Correctly mark first token in the presence of UTF-8 BOM.
Summary: Fixes http://llvm.org/PR17753
Reviewers: klimek
Reviewed By: klimek
CC: cfe-commits, klimek
Differential Revision: http://llvm-reviews.chandlerc.com/D2159
Modified:
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=194576&r1=194575&r2=194576&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Wed Nov 13 08:04:17 2013
@@ -996,7 +996,7 @@ class FormatTokenLexer {
public:
FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
encoding::Encoding Encoding)
- : FormatTok(NULL), GreaterStashed(false), Column(0),
+ : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
Lex.SetKeepWhitespaceMode(true);
@@ -1069,8 +1069,8 @@ private:
readRawToken(*FormatTok);
SourceLocation WhitespaceStart =
FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
- if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
- FormatTok->IsFirst = true;
+ FormatTok->IsFirst = IsFirstToken;
+ IsFirstToken = false;
// Consume and record whitespace until we find a significant token.
unsigned WhitespaceLength = TrailingWhitespace;
@@ -1181,6 +1181,7 @@ private:
}
FormatToken *FormatTok;
+ bool IsFirstToken;
bool GreaterStashed;
unsigned Column;
unsigned TrailingWhitespace;
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=194576&r1=194575&r2=194576&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Nov 13 08:04:17 2013
@@ -6911,6 +6911,14 @@ TEST_F(FormatTest, WorksFor8bitEncodings
getLLVMStyleWithColumns(12)));
}
+TEST_F(FormatTest, HandlesUTF8BOM) {
+ EXPECT_EQ("\xef\xbb\xbf", format("\xef\xbb\xbf"));
+ EXPECT_EQ("\xef\xbb\xbf#include <iostream>",
+ format("\xef\xbb\xbf#include <iostream>"));
+ EXPECT_EQ("\xef\xbb\xbf\n#include <iostream>",
+ format("\xef\xbb\xbf\n#include <iostream>"));
+}
+
// FIXME: Encode Cyrillic and CJK characters below to appease MS compilers.
#if !defined(_MSC_VER)
More information about the cfe-commits
mailing list