[cfe-commits] r129174 - in /cfe/trunk/lib: Basic/SourceManager.cpp Lex/Lexer.cpp
Eric Christopher
echristo at apple.com
Fri Apr 8 17:01:04 PDT 2011
Author: echristo
Date: Fri Apr 8 19:01:04 2011
New Revision: 129174
URL: http://llvm.org/viewvc/llvm-project?rev=129174&view=rev
Log:
Eat the UTF-8 BOM at the beginning of a file since it's ignored anyhow.
Nom Nom Nom.
Patch by Anton Korobeynikov!
Modified:
cfe/trunk/lib/Basic/SourceManager.cpp
cfe/trunk/lib/Lex/Lexer.cpp
Modified: cfe/trunk/lib/Basic/SourceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/SourceManager.cpp?rev=129174&r1=129173&r2=129174&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/SourceManager.cpp (original)
+++ cfe/trunk/lib/Basic/SourceManager.cpp Fri Apr 8 19:01:04 2011
@@ -126,13 +126,12 @@
if (Invalid) *Invalid = true;
return Buffer.getPointer();
}
-
+
// If the buffer is valid, check to see if it has a UTF Byte Order Mark
- // (BOM). We only support UTF-8 without a BOM right now. See
+ // (BOM). We only support UTF-8 with and without a BOM right now. See
// http://en.wikipedia.org/wiki/Byte_order_mark for more information.
llvm::StringRef BufStr = Buffer.getPointer()->getBuffer();
- const char *BOM = llvm::StringSwitch<const char *>(BufStr)
- .StartsWith("\xEF\xBB\xBF", "UTF-8")
+ const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
.StartsWith("\xFE\xFF", "UTF-16 (BE)")
.StartsWith("\xFF\xFE", "UTF-16 (LE)")
.StartsWith("\x00\x00\xFE\xFF", "UTF-32 (BE)")
@@ -145,9 +144,9 @@
.StartsWith("\x84\x31\x95\x33", "GB-18030")
.Default(0);
- if (BOM) {
+ if (InvalidBOM) {
Diag.Report(Loc, diag::err_unsupported_bom)
- << BOM << ContentsEntry->getName();
+ << InvalidBOM << ContentsEntry->getName();
Buffer.setInt(Buffer.getInt() | InvalidFlag);
}
Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=129174&r1=129173&r2=129174&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Fri Apr 8 19:01:04 2011
@@ -71,9 +71,22 @@
"We assume that the input buffer has a null character at the end"
" to simplify lexing!");
+ // Check whether we have a BOM in the beginning of the buffer. If yes - act
+ // accordingly. Right now we support only UTF-8 with and without BOM, so, just
+ // skip the UTF-8 BOM if it's present.
+ if (BufferStart == BufferPtr) {
+ // Determine the size of the BOM.
+ size_t BOMLength = llvm::StringSwitch<size_t>(BufferStart)
+ .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+ .Default(0);
+
+ // Skip the BOM.
+ BufferPtr += BOMLength;
+ }
+
Is_PragmaLexer = false;
IsInConflictMarker = false;
-
+
// Start of the file is a start of line.
IsAtStartOfLine = true;
More information about the cfe-commits
mailing list