[PATCH] Correctly mark first token in the presence of UTF-8 BOM.

Alexander Kornienko alexfh at google.com
Tue Nov 12 17:42:36 PST 2013


Hi klimek,

Fixes http://llvm.org/PR17753

http://llvm-reviews.chandlerc.com/D2159

Files:
  .gitignore
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: .gitignore
===================================================================
--- .gitignore
+++ .gitignore
@@ -29,7 +29,7 @@
 # Directories to ignore (do not add trailing '/'s, they skip symlinks).
 #==============================================================================#
 # Clang extra user tools, which is tracked independently (clang-tools-extra).
-tools/extra
+# !!! alexfh: track tools/extra in the same git directory tools/extra
 # Sphinx build products
 docs/_build
 docs/analyzer/_build
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -996,7 +996,7 @@
 public:
   FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
                    encoding::Encoding Encoding)
-      : FormatTok(NULL), GreaterStashed(false), Column(0),
+      : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
         TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
         IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
     Lex.SetKeepWhitespaceMode(true);
@@ -1069,8 +1069,8 @@
     readRawToken(*FormatTok);
     SourceLocation WhitespaceStart =
         FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
-    if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
-      FormatTok->IsFirst = true;
+    FormatTok->IsFirst = IsFirstToken;
+    IsFirstToken = false;
 
     // Consume and record whitespace until we find a significant token.
     unsigned WhitespaceLength = TrailingWhitespace;
@@ -1181,6 +1181,7 @@
   }
 
   FormatToken *FormatTok;
+  bool IsFirstToken;
   bool GreaterStashed;
   unsigned Column;
   unsigned TrailingWhitespace;
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -6911,6 +6911,14 @@
                    getLLVMStyleWithColumns(12)));
 }
 
+TEST_F(FormatTest, HandlesUTF8BOM) {
+  EXPECT_EQ("\xef\xbb\xbf", format("\xef\xbb\xbf"));
+  EXPECT_EQ("\xef\xbb\xbf#include <iostream>",
+            format("\xef\xbb\xbf#include <iostream>"));
+  EXPECT_EQ("\xef\xbb\xbf\n#include <iostream>",
+            format("\xef\xbb\xbf\n#include <iostream>"));
+}
+
 // FIXME: Encode Cyrillic and CJK characters below to appease MS compilers.
 #if !defined(_MSC_VER)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2159.1.patch
Type: text/x-patch
Size: 2419 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20131112/4280e6ca/attachment.bin>


More information about the cfe-commits mailing list