[cfe-commits] r158390 - in /cfe/trunk: lib/AST/Expr.cpp lib/Lex/LiteralSupport.cpp lib/Sema/SemaChecking.cpp test/SemaCXX/format-strings-0x.cpp
Sean Silva
silvas at purdue.edu
Tue Jun 12 23:41:52 PDT 2012
+ assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
+ "Only narrow string literals are currently supported");
If a non-narrow string-literal is encountered with asserts off, will this
just continue on and silently corrupt the rest of the compilation? Or will
parsing the non-narrow string literal gracefully fail somewhere earlier in
the pipeline?
On Tue, Jun 12, 2012 at 10:37 PM, Richard Smith
<richard-llvm at metafoo.co.uk>wrote:
> Author: rsmith
> Date: Wed Jun 13 00:37:23 2012
> New Revision: 158390
>
> URL: http://llvm.org/viewvc/llvm-project?rev=158390&view=rev
> Log:
> PR13099: Teach -Wformat about raw string literals, UTF-8 strings and
> Unicode escape sequences.
>
> Modified:
> cfe/trunk/lib/AST/Expr.cpp
> cfe/trunk/lib/Lex/LiteralSupport.cpp
> cfe/trunk/lib/Sema/SemaChecking.cpp
> cfe/trunk/test/SemaCXX/format-strings-0x.cpp
>
> Modified: cfe/trunk/lib/AST/Expr.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=158390&r1=158389&r2=158390&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/AST/Expr.cpp (original)
> +++ cfe/trunk/lib/AST/Expr.cpp Wed Jun 13 00:37:23 2012
> @@ -679,7 +679,8 @@
> SourceLocation StringLiteral::
> getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
> const LangOptions &Features, const TargetInfo &Target)
> const {
> - assert(Kind == StringLiteral::Ascii && "This only works for ASCII
> strings");
> + assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
> + "Only narrow string literals are currently supported");
>
> // Loop over all of the tokens in this string until we find the one that
> // contains the byte we're looking for.
>
> Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=158390&r1=158389&r2=158390&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
> +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Wed Jun 13 00:37:23 2012
> @@ -250,6 +250,39 @@
> return true;
> }
>
> +/// MeasureUCNEscape - Determine the number of bytes within the resulting
> string
> +/// which this UCN will occupy.
> +static int MeasureUCNEscape(const char *ThisTokBegin, const char
> *&ThisTokBuf,
> + const char *ThisTokEnd, unsigned
> CharByteWidth,
> + const LangOptions &Features, bool &HadError) {
> + // UTF-32: 4 bytes per escape.
> + if (CharByteWidth == 4)
> + return 4;
> +
> + uint32_t UcnVal = 0;
> + unsigned short UcnLen = 0;
> + FullSourceLoc Loc;
> +
> + if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
> + UcnLen, Loc, 0, Features, true)) {
> + HadError = true;
> + return 0;
> + }
> +
> + // UTF-16: 2 bytes for BMP, 4 bytes otherwise.
> + if (CharByteWidth == 2)
> + return UcnVal <= 0xFFFF ? 2 : 4;
> +
> + // UTF-8.
> + if (UcnVal < 0x80)
> + return 1;
> + if (UcnVal < 0x800)
> + return 2;
> + if (UcnVal < 0x10000)
> + return 3;
> + return 4;
> +}
> +
> /// EncodeUCNEscape - Read the Universal Character Name, check
> constraints and
> /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
> /// StringLiteralParser. When we decide to implement UCN's for
> identifiers,
> @@ -265,7 +298,7 @@
> unsigned short UcnLen = 0;
> if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
> UcnLen,
> Loc, Diags, Features, true)) {
> - HadError = 1;
> + HadError = true;
> return;
> }
>
> @@ -1369,14 +1402,31 @@
> if (StringInvalid)
> return 0;
>
> + const char *SpellingStart = SpellingPtr;
> + const char *SpellingEnd = SpellingPtr+TokLen;
> +
> + // Handle UTF-8 strings just like narrow strings.
> + if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
> + SpellingPtr += 2;
> +
> assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
> SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings
> yet");
>
> + // For raw string literals, this is easy.
> + if (SpellingPtr[0] == 'R') {
> + assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
> + // Skip 'R"'.
> + SpellingPtr += 2;
> + while (*SpellingPtr != '(') {
> + ++SpellingPtr;
> + assert(SpellingPtr < SpellingEnd && "Missing ( for raw string
> literal");
> + }
> + // Skip '('.
> + ++SpellingPtr;
> + return SpellingPtr - SpellingStart + ByteNo;
> + }
>
> - const char *SpellingStart = SpellingPtr;
> - const char *SpellingEnd = SpellingPtr+TokLen;
> -
> - // Skip over the leading quote.
> + // Skip over the leading quote
> assert(SpellingPtr[0] == '"' && "Should be a string literal!");
> ++SpellingPtr;
>
> @@ -1393,11 +1443,23 @@
>
> // Otherwise, this is an escape character. Advance over it.
> bool HadError = false;
> - ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
> - FullSourceLoc(Tok.getLocation(), SM),
> - CharByteWidth*8, Diags);
> + if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') {
> + const char *EscapePtr = SpellingPtr;
> + unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr,
> SpellingEnd,
> + 1, Features, HadError);
> + if (Len > ByteNo) {
> + // ByteNo is somewhere within the escape sequence.
> + SpellingPtr = EscapePtr;
> + break;
> + }
> + ByteNo -= Len;
> + } else {
> + ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
> + FullSourceLoc(Tok.getLocation(), SM),
> + CharByteWidth*8, Diags);
> + --ByteNo;
> + }
> assert(!HadError && "This method isn't valid on erroneous strings");
> - --ByteNo;
> }
>
> return SpellingPtr-SpellingStart;
>
> Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=158390&r1=158389&r2=158390&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jun 13 00:37:23 2012
> @@ -2633,7 +2633,7 @@
> bool inFunctionCall) {
>
> // CHECK: is the format string a wide literal?
> - if (!FExpr->isAscii()) {
> + if (!FExpr->isAscii() && !FExpr->isUTF8()) {
> CheckFormatHandler::EmitFormatDiagnostic(
> *this, inFunctionCall, Args[format_idx],
> PDiag(diag::warn_format_string_is_wide_literal),
> FExpr->getLocStart(),
>
> Modified: cfe/trunk/test/SemaCXX/format-strings-0x.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/format-strings-0x.cpp?rev=158390&r1=158389&r2=158390&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/SemaCXX/format-strings-0x.cpp (original)
> +++ cfe/trunk/test/SemaCXX/format-strings-0x.cpp Wed Jun 13 00:37:23 2012
> @@ -12,4 +12,16 @@
> scanf("%afoobar", fp);
> printf(nullptr);
> printf(*sp); // expected-warning {{not a string literal}}
> +
> + // PR13099
> + printf(
> + R"foobar(%)foobar"
> + R"bazquux(d)bazquux" // expected-warning {{more '%' conversions than
> data arguments}}
> + R"xyzzy()xyzzy");
> +
> + printf(u8"this is %d test", 0); // ok
> + printf(u8R"foo(
> + \u1234\U0010fffe
> + %d)foo" // expected-warning {{more '%' conversions than data
> arguments}}
> + );
> }
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20120612/cac25230/attachment.html>
More information about the cfe-commits
mailing list