[cfe-commits] r166900 - in /cfe/trunk: lib/Lex/LiteralSupport.cpp test/Misc/wrong-encoding.c
Seth Cantrell
seth.cantrell at gmail.com
Sun Oct 28 11:24:46 PDT 2012
Author: socantre
Date: Sun Oct 28 13:24:46 2012
New Revision: 166900
URL: http://llvm.org/viewvc/llvm-project?rev=166900&view=rev
Log:
improve highlighting of invalid string encodings
limit highlight to exactly the bad encoding, and highlight every
bad encoding in a string.
Modified:
cfe/trunk/lib/Lex/LiteralSupport.cpp
cfe/trunk/test/Misc/wrong-encoding.c
Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=166900&r1=166899&r2=166900&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Sun Oct 28 13:24:46 2012
@@ -49,6 +49,20 @@
}
}
+static CharSourceRange MakeCharSourceRange(const LangOptions &Features,
+ FullSourceLoc TokLoc,
+ const char *TokBegin,
+ const char *TokRangeBegin,
+ const char *TokRangeEnd) {
+ SourceLocation Begin =
+ Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
+ TokLoc.getManager(), Features);
+ SourceLocation End =
+ Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
+ TokLoc.getManager(), Features);
+ return CharSourceRange::getCharRange(Begin, End);
+}
+
/// \brief Produce a diagnostic highlighting some portion of a literal.
///
/// Emits the diagnostic \p DiagID, highlighting the range of characters from
@@ -61,11 +75,8 @@
SourceLocation Begin =
Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
TokLoc.getManager(), Features);
- SourceLocation End =
- Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
- TokLoc.getManager(), Features);
- return Diags->Report(Begin, DiagID)
- << CharSourceRange::getCharRange(Begin, End);
+ return Diags->Report(Begin, DiagID) <<
+ MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
}
/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
@@ -1372,6 +1383,15 @@
}
}
+static const char *resync_utf8(const char *err, const char *end) {
+ if (err==end)
+ return end;
+ end = err + std::min<unsigned>(getNumBytesForUTF8(*err), end-err);
+ while (++err!=end && (*err&0xC0)==0x80)
+ ;
+ return err;
+}
+
/// \brief This function copies from Fragment, which is a sequence of bytes
/// within Tok's contents (which begin at TokBegin) into ResultPtr.
/// Performs widening for multi-byte characters.
@@ -1381,7 +1401,6 @@
const UTF8 *ErrorPtrTmp;
if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
return false;
- const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
// If we see bad encoding for unprefixed string literals, warn and
// simply copy the byte values, for compatibility with gcc and older
@@ -1391,12 +1410,31 @@
memcpy(ResultPtr, Fragment.data(), Fragment.size());
ResultPtr += Fragment.size();
}
+
if (Diags) {
- Diag(Diags, Features, FullSourceLoc(Tok.getLocation(), SM), TokBegin,
- ErrorPtr, ErrorPtr + std::min<unsigned>(getNumBytesForUTF8(*ErrorPtr),
- Fragment.end() - ErrorPtr),
- NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
- : diag::err_bad_string_encoding);
+ const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
+
+ FullSourceLoc SourceLoc(Tok.getLocation(), SM);
+ const DiagnosticBuilder &Builder =
+ Diag(Diags, Features, SourceLoc, TokBegin,
+ ErrorPtr, resync_utf8(ErrorPtr, Fragment.end()),
+ NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
+ : diag::err_bad_string_encoding);
+
+ char *SavedResultPtr = ResultPtr;
+ const char *NextStart = resync_utf8(ErrorPtr, Fragment.end());
+ StringRef NextFragment(NextStart, Fragment.end()-NextStart);
+
+ while (!ConvertUTF8toWide(CharByteWidth, NextFragment, ResultPtr,
+ ErrorPtrTmp)) {
+ const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
+ NextStart = resync_utf8(ErrorPtr, Fragment.end());
+ Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
+ ErrorPtr, NextStart);
+ NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
+ }
+
+ ResultPtr = SavedResultPtr;
}
return !NoErrorOnBadEncoding;
}
Modified: cfe/trunk/test/Misc/wrong-encoding.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/wrong-encoding.c?rev=166900&r1=166899&r2=166900&view=diff
==============================================================================
--- cfe/trunk/test/Misc/wrong-encoding.c (original)
+++ cfe/trunk/test/Misc/wrong-encoding.c Sun Oct 28 13:24:46 2012
@@ -1,16 +1,33 @@
-// RUN: %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value %s 2>&1 | FileCheck -strict-whitespace %s
void foo() {
"§Ã"; // ø
// CHECK: {{^ "<A7><C3>"; // <F8>}}
-// CHECK: {{^ \^~~~}}
+// CHECK: {{^ \^~~~~~~}}
/* þ« */ const char *d = "¥";
// CHECK: {{^ /\* <FE><AB> \*/ const char \*d = "<A5>";}}
// CHECK: {{^ \^~~~}}
-// CHECK: {{^ "<A7><C3>"; // <F8>}}
-// CHECK: {{^ \^~~~~~~~~~}}
+ "xxé¿¿¿d";
+// CHECK: {{^ "xx<U\+9FFF><BF>d";}}
+// CHECK: {{^ \^~~~}}
+
+ "xxé¿bcd";
+// CHECK: {{^ "xx<E9><BF>bcd";}}
+// CHECK: {{^ \^~~~~~~~}}
+
+ "xxéabcd";
+// CHECK: {{^ "xx<E9>abcd";}}
+// CHECK: {{^ \^~~~}}
+
+ "xxé¿é¿d";
+// CHECK: {{^ "xx<E9><BF><E9><BF>d";}}
+// CHECK: {{^ \^~~~~~~~~~~~~~~}}
+
+ "xxé¿xxxxxxxxxxxxxxxxxxxxxé¿xx";
+// CHECK: {{^ "xx<E9><BF>xxxxxxxxxxxxxxxxxxxxx<E9><BF>xx";}}
+// CHECK: {{^ \^~~~~~~~ ~~~~~~~~}}
}
More information about the cfe-commits
mailing list