[cfe-commits] r101924 - in /cfe/trunk: include/clang/Basic/DiagnosticCommonKinds.td lib/Basic/SourceManager.cpp test/Lexer/utf-16.c test/Lexer/utf-16.c.txt
Chris Lattner
sabre at nondot.org
Tue Apr 20 11:14:03 PDT 2010
Author: lattner
Date: Tue Apr 20 13:14:03 2010
New Revision: 101924
URL: http://llvm.org/viewvc/llvm-project?rev=101924&view=rev
Log:
enhance sourcemgr to detect various UTF BOM's and emit a fatal error
about it instead of producing tons of garbage from the lexer.
It would be even better for sourcemgr to dynamically transcode (e.g.
from UTF16 -> UTF8).
Added:
cfe/trunk/test/Lexer/utf-16.c
cfe/trunk/test/Lexer/utf-16.c.txt (with props)
Modified:
cfe/trunk/include/clang/Basic/DiagnosticCommonKinds.td
cfe/trunk/lib/Basic/SourceManager.cpp
Modified: cfe/trunk/include/clang/Basic/DiagnosticCommonKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticCommonKinds.td?rev=101924&r1=101923&r2=101924&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticCommonKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticCommonKinds.td Tue Apr 20 13:14:03 2010
@@ -72,5 +72,6 @@
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
def err_file_modified : Error<
"file '%0' modified since it was first processed">, DefaultFatal;
-
+def err_unsupported_bom : Error<"%0 byte order mark detected in '%1', but "
+ "encoding is not supported">, DefaultFatal;
}
Modified: cfe/trunk/lib/Basic/SourceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/SourceManager.cpp?rev=101924&r1=101923&r2=101924&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/SourceManager.cpp (original)
+++ cfe/trunk/lib/Basic/SourceManager.cpp Tue Apr 20 13:14:03 2010
@@ -119,6 +119,41 @@
Buffer.setInt(true);
#endif
}
+
+ // If the buffer is valid, check to see if it has a UTF Byte Order Mark
+ // (BOM). We only support UTF-8 without a BOM right now. See
+ // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
+ if (!Buffer.getInt()) {
+ llvm::StringRef BufStr = Buffer.getPointer()->getBuffer();
+ const char *BOM = 0;
+ if (BufStr.startswith("\xFE\xBB\xBF"))
+ BOM = "UTF-8";
+ else if (BufStr.startswith("\xFE\xFF"))
+ BOM = "UTF-16 (BE)";
+ else if (BufStr.startswith("\xFF\xFE"))
+ BOM = "UTF-16 (LE)";
+ else if (BufStr.startswith(llvm::StringRef("\x00\x00\xFE\xFF", 4)))
+ BOM = "UTF-32 (BE)";
+ else if (BufStr.startswith(llvm::StringRef("\xFF\xFE\x00\x00", 4)))
+ BOM = "UTF-32 (LE)";
+ else if (BufStr.startswith("\x2B\x2F\x76"))
+ BOM = "UTF-7";
+ else if (BufStr.startswith("\xF7\x64\x4C"))
+ BOM = "UTF-1";
+ else if (BufStr.startswith("\xDD\x73\x66\x73"))
+ BOM = "UTF-EBCDIC";
+ else if (BufStr.startswith("\x0E\xFE\xFF"))
+ BOM = "SDSU";
+ else if (BufStr.startswith("\xFB\xEE\x28"))
+ BOM = "BOCU-1";
+ else if (BufStr.startswith("\x84\x31\x95\x33"))
+ BOM = "BOCU-1";
+
+ if (BOM) {
+ Diag.Report(diag::err_unsupported_bom) << BOM << Entry->getName();
+ Buffer.setInt(1);
+ }
+ }
}
if (Invalid)
Added: cfe/trunk/test/Lexer/utf-16.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/utf-16.c?rev=101924&view=auto
==============================================================================
--- cfe/trunk/test/Lexer/utf-16.c (added)
+++ cfe/trunk/test/Lexer/utf-16.c Tue Apr 20 13:14:03 2010
@@ -0,0 +1,4 @@
+// RUN: not %clang -xc %s.txt -fsyntax-only 2>&1 | grep 'UTF-16 (LE) byte order mark detected'
+// rdar://7876588
+
+// This test verifies that clang gives a decent error for UTF-16 source files.
\ No newline at end of file
Added: cfe/trunk/test/Lexer/utf-16.c.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/utf-16.c.txt?rev=101924&view=auto
==============================================================================
Binary file - no diff available.
Propchange: cfe/trunk/test/Lexer/utf-16.c.txt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
More information about the cfe-commits
mailing list