[cfe-commits] r166900 - in /cfe/trunk: lib/Lex/LiteralSupport.cpp test/Misc/wrong-encoding.c
David Blaikie
dblaikie at gmail.com
Tue Oct 30 16:23:34 PDT 2012
On Sun, Oct 28, 2012 at 11:24 AM, Seth Cantrell <seth.cantrell at gmail.com> wrote:
> Author: socantre
> Date: Sun Oct 28 13:24:46 2012
> New Revision: 166900
>
> URL: http://llvm.org/viewvc/llvm-project?rev=166900&view=rev
> Log:
> improve highlighting of invalid string encodings
>
> limit highlight to exactly the bad encoding, and highlight every
> bad encoding in a string.
>
> Modified:
> cfe/trunk/lib/Lex/LiteralSupport.cpp
> cfe/trunk/test/Misc/wrong-encoding.c
>
> Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=166900&r1=166899&r2=166900&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
> +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Sun Oct 28 13:24:46 2012
> @@ -49,6 +49,20 @@
> }
> }
>
> +static CharSourceRange MakeCharSourceRange(const LangOptions &Features,
> + FullSourceLoc TokLoc,
> + const char *TokBegin,
> + const char *TokRangeBegin,
> + const char *TokRangeEnd) {
> + SourceLocation Begin =
> + Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
> + TokLoc.getManager(), Features);
> + SourceLocation End =
> + Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
> + TokLoc.getManager(), Features);
> + return CharSourceRange::getCharRange(Begin, End);
> +}
> +
> /// \brief Produce a diagnostic highlighting some portion of a literal.
> ///
> /// Emits the diagnostic \p DiagID, highlighting the range of characters from
> @@ -61,11 +75,8 @@
> SourceLocation Begin =
> Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
> TokLoc.getManager(), Features);
> - SourceLocation End =
> - Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
> - TokLoc.getManager(), Features);
> - return Diags->Report(Begin, DiagID)
> - << CharSourceRange::getCharRange(Begin, End);
> + return Diags->Report(Begin, DiagID) <<
> + MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
> }
>
> /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
> @@ -1372,6 +1383,15 @@
> }
> }
>
> +static const char *resync_utf8(const char *err, const char *end) {
> + if (err==end)
> + return end;
> + end = err + std::min<unsigned>(getNumBytesForUTF8(*err), end-err);
> + while (++err!=end && (*err&0xC0)==0x80)
> + ;
> + return err;
> +}
> +
> /// \brief This function copies from Fragment, which is a sequence of bytes
> /// within Tok's contents (which begin at TokBegin) into ResultPtr.
> /// Performs widening for multi-byte characters.
> @@ -1381,7 +1401,6 @@
> const UTF8 *ErrorPtrTmp;
> if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
> return false;
> - const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
>
> // If we see bad encoding for unprefixed string literals, warn and
> // simply copy the byte values, for compatibility with gcc and older
> @@ -1391,12 +1410,31 @@
> memcpy(ResultPtr, Fragment.data(), Fragment.size());
> ResultPtr += Fragment.size();
> }
> +
> if (Diags) {
> - Diag(Diags, Features, FullSourceLoc(Tok.getLocation(), SM), TokBegin,
> - ErrorPtr, ErrorPtr + std::min<unsigned>(getNumBytesForUTF8(*ErrorPtr),
> - Fragment.end() - ErrorPtr),
> - NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
> - : diag::err_bad_string_encoding);
> + const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
> +
> + FullSourceLoc SourceLoc(Tok.getLocation(), SM);
> + const DiagnosticBuilder &Builder =
> + Diag(Diags, Features, SourceLoc, TokBegin,
> + ErrorPtr, resync_utf8(ErrorPtr, Fragment.end()),
> + NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
> + : diag::err_bad_string_encoding);
> +
> + char *SavedResultPtr = ResultPtr;
> + const char *NextStart = resync_utf8(ErrorPtr, Fragment.end());
> + StringRef NextFragment(NextStart, Fragment.end()-NextStart);
> +
> + while (!ConvertUTF8toWide(CharByteWidth, NextFragment, ResultPtr,
> + ErrorPtrTmp)) {
> + const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
> + NextStart = resync_utf8(ErrorPtr, Fragment.end());
> + Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
> + ErrorPtr, NextStart);
This fails when you exceed the bounds of the DiagnosticBuilder's
SourceRange range. I fixed this (by limiting the number of ranges we
add to the diagnostic) in r167059. Feel free to try other solutions if
that one isn't suitable in some way.
> + NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
> + }
> +
> + ResultPtr = SavedResultPtr;
> }
> return !NoErrorOnBadEncoding;
> }
>
> Modified: cfe/trunk/test/Misc/wrong-encoding.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/wrong-encoding.c?rev=166900&r1=166899&r2=166900&view=diff
> ==============================================================================
> --- cfe/trunk/test/Misc/wrong-encoding.c (original)
> +++ cfe/trunk/test/Misc/wrong-encoding.c Sun Oct 28 13:24:46 2012
> @@ -1,16 +1,33 @@
> -// RUN: %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck -strict-whitespace %s
> +// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value %s 2>&1 | FileCheck -strict-whitespace %s
>
> void foo() {
>
> "§Ã"; // ø
> // CHECK: {{^ "<A7><C3>"; // <F8>}}
> -// CHECK: {{^ \^~~~}}
> +// CHECK: {{^ \^~~~~~~}}
>
> /* þ« */ const char *d = "¥";
>
> // CHECK: {{^ /\* <FE><AB> \*/ const char \*d = "<A5>";}}
> // CHECK: {{^ \^~~~}}
>
> -// CHECK: {{^ "<A7><C3>"; // <F8>}}
> -// CHECK: {{^ \^~~~~~~~~~}}
> + "xxé¿¿¿d";
> +// CHECK: {{^ "xx<U\+9FFF><BF>d";}}
> +// CHECK: {{^ \^~~~}}
> +
> + "xxé¿bcd";
> +// CHECK: {{^ "xx<E9><BF>bcd";}}
> +// CHECK: {{^ \^~~~~~~~}}
> +
> + "xxéabcd";
> +// CHECK: {{^ "xx<E9>abcd";}}
> +// CHECK: {{^ \^~~~}}
> +
> + "xxé¿é¿d";
> +// CHECK: {{^ "xx<E9><BF><E9><BF>d";}}
> +// CHECK: {{^ \^~~~~~~~~~~~~~~}}
> +
> + "xxé¿xxxxxxxxxxxxxxxxxxxxxé¿xx";
> +// CHECK: {{^ "xx<E9><BF>xxxxxxxxxxxxxxxxxxxxx<E9><BF>xx";}}
> +// CHECK: {{^ \^~~~~~~~ ~~~~~~~~}}
> }
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list