[cfe-commits] r152344 - in /cfe/trunk: include/clang/Basic/ConvertUTF.h lib/Basic/ConvertUTF.c lib/Lex/LiteralSupport.cpp test/Lexer/cxx0x_raw_string_delim_length.cpp test/Lexer/string-literal-encoding.c
Eli Friedman
eli.friedman at gmail.com
Thu Mar 8 14:29:29 PST 2012
On Thu, Mar 8, 2012 at 1:59 PM, Richard Smith
<richard-llvm at metafoo.co.uk> wrote:
> Author: rsmith
> Date: Thu Mar 8 15:59:28 2012
> New Revision: 152344
>
> URL: http://llvm.org/viewvc/llvm-project?rev=152344&view=rev
> Log:
> When checking the encoding of an 8-bit string literal, don't just check the
> first codepoint! Also, don't reject empty raw string literals for spurious
> "encoding" issues. Also, don't rely on undefined behavior in ConvertUTF.c.
>
> Modified:
> cfe/trunk/include/clang/Basic/ConvertUTF.h
> cfe/trunk/lib/Basic/ConvertUTF.c
> cfe/trunk/lib/Lex/LiteralSupport.cpp
> cfe/trunk/test/Lexer/cxx0x_raw_string_delim_length.cpp
> cfe/trunk/test/Lexer/string-literal-encoding.c
>
> Modified: cfe/trunk/include/clang/Basic/ConvertUTF.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/ConvertUTF.h?rev=152344&r1=152343&r2=152344&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Basic/ConvertUTF.h (original)
> +++ cfe/trunk/include/clang/Basic/ConvertUTF.h Thu Mar 8 15:59:28 2012
> @@ -151,9 +151,11 @@
> ConversionResult ConvertUTF32toUTF16 (
> const UTF32** sourceStart, const UTF32* sourceEnd,
> UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
> -#endif
>
> Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
> +#endif
> +
> +Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd);
>
> #ifdef __cplusplus
> }
>
> Modified: cfe/trunk/lib/Basic/ConvertUTF.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/ConvertUTF.c?rev=152344&r1=152343&r2=152344&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Basic/ConvertUTF.c (original)
> +++ cfe/trunk/lib/Basic/ConvertUTF.c Thu Mar 8 15:59:28 2012
> @@ -387,7 +387,7 @@
> */
> Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
> int length = trailingBytesForUTF8[*source]+1;
> - if (source+length > sourceEnd) {
> + if (length > sourceEnd - source) {
> return false;
> }
> return isLegalUTF8(source, length);
> @@ -395,6 +395,22 @@
>
> /* --------------------------------------------------------------------- */
>
> +/*
> + * Exported function to return whether a UTF-8 string is legal or not.
> + * This is not used here; it's just exported.
> + */
> +Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) {
> + while (source != sourceEnd) {
> + int length = trailingBytesForUTF8[*source] + 1;
> + if (length > sourceEnd - source || !isLegalUTF8(source, length))
> + return false;
> + source += length;
> + }
> + return true;
> +}
> +
> +/* --------------------------------------------------------------------- */
> +
> ConversionResult ConvertUTF8toUTF16 (
> const UTF8** sourceStart, const UTF8* sourceEnd,
> UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
> @@ -404,7 +420,7 @@
> while (source < sourceEnd) {
> UTF32 ch = 0;
> unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
> - if (source + extraBytesToRead >= sourceEnd) {
> + if (extraBytesToRead >= sourceEnd - source) {
> result = sourceExhausted; break;
> }
> /* Do this check whether lenient or strict */
> @@ -477,7 +493,7 @@
> while (source < sourceEnd) {
> UTF32 ch = 0;
> unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
> - if (source + extraBytesToRead >= sourceEnd) {
> + if (extraBytesToRead >= sourceEnd - source) {
> result = sourceExhausted; break;
> }
> /* Do this check whether lenient or strict */
>
> Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=152344&r1=152343&r2=152344&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
> +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Thu Mar 8 15:59:28 2012
> @@ -333,7 +333,7 @@
> /// decimal-constant integer-suffix
> /// octal-constant integer-suffix
> /// hexadecimal-constant integer-suffix
> -/// user-defiend-integer-literal: [C++11 lex.ext]
> +/// user-defined-integer-literal: [C++11 lex.ext]
> /// decimal-literal ud-suffix
> /// octal-literal ud-suffix
> /// hexadecimal-literal ud-suffix
> @@ -1167,17 +1167,14 @@
> ++ThisTokBuf;
> ++ThisTokBuf; // skip '('
>
> - // remove same number of characters from the end
> - if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
> - ThisTokEnd -= (ThisTokBuf - Prefix);
> + // Remove same number of characters from the end
> + ThisTokEnd -= ThisTokBuf - Prefix;
> + assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
>
> // Copy the string over
> - if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf)))
> - {
> + if (CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)))
> if (DiagnoseBadString(StringToks[i]))
> hadError = true;
> - }
> -
> } else {
> assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
> ++ThisTokBuf; // skip "
> @@ -1204,11 +1201,9 @@
> } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
>
> // Copy the character span over.
> - if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart)))
> - {
> + if (CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart)))
> if (DiagnoseBadString(StringToks[i]))
> hadError = true;
> - }
> continue;
> }
> // Is this a Universal Character Name escape?
> @@ -1292,8 +1287,8 @@
> ConversionResult result = conversionOK;
> // Copy the character span over.
> if (CharByteWidth == 1) {
> - if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()),
> - reinterpret_cast<const UTF8*>(Fragment.end())))
> + if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()),
> + reinterpret_cast<const UTF8*>(Fragment.end())))
Ah, I think that one is my fault... thanks for spotting it.
-Eli
More information about the cfe-commits
mailing list