[cfe-commits] r148392 - in /cfe/trunk/test: CodeGen/char-literal.c CodeGen/string-literal-short-wstring.c Lexer/char-literal-encoding-error.c Lexer/char-literal.cpp Lexer/constants.c Lexer/utf8-char-literal.cpp Lexer/wchar.c

Wed Jan 18 08:35:34 PST 2012

On Jan 18, 2012, at 10:37 AM, Nico Weber <thakis at chromium.org> wrote:

> On Wed, Jan 18, 2012 at 4:27 AM, Seth Cantrell <seth.cantrell at gmail.com> wrote:
>> Author: socantre
>> Date: Wed Jan 18 06:27:10 2012
>> New Revision: 148392
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=148392&view=rev
>> Log:
>> Add and update tests for character literals
>>
>> Added:
>>    cfe/trunk/test/Lexer/char-literal-encoding-error.c
>>    cfe/trunk/test/Lexer/char-literal.cpp
>> Modified:
>>    cfe/trunk/test/CodeGen/char-literal.c
>>    cfe/trunk/test/CodeGen/string-literal-short-wstring.c
>>    cfe/trunk/test/Lexer/constants.c
>>    cfe/trunk/test/Lexer/utf8-char-literal.cpp
>>    cfe/trunk/test/Lexer/wchar.c
>>
>> Modified: cfe/trunk/test/CodeGen/char-literal.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/char-literal.c?rev=148392&r1=148391&r2=148392&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/char-literal.c (original)
>> +++ cfe/trunk/test/CodeGen/char-literal.c Wed Jan 18 06:27:10 2012
>> @@ -9,11 +9,26 @@
>>   // CHECK-CPP0X: store i8 97
>>   char a = 'a';
>>
>> -  // Should pick second character.
>> +  // Should truncate value (equal to last character).
>>   // CHECK-C: store i8 98
>>   // CHECK-CPP0X: store i8 98
>>   char b = 'ab';
>>
>> +  // Should get concatonated characters
>
> typo 'concatonated'

Thanks. I've thought of some tests I should add. I'll fix this typo
when I add them.

>
>> +  // CHECK-C: store i32 24930
>> +  // CHECK-CPP0X: store i32 24930
>> +  int b1 = 'ab';
>> +
>> +  // Should get concatonated characters
>> +  // CHECK-C: store i32 808464432
>> +  // CHECK-CPP0X: store i32 808464432
>> +  int b2 = '0000';
>> +
>> +  // Should get truncated value (last four characters concatonated)
>> +  // CHECK-C: store i32 1919512167
>> +  // CHECK-CPP0X: store i32 1919512167
>> +  int b3 = 'somesillylongstring';
>> +
>>   // CHECK-C: store i32 97
>>   // CHECK-CPP0X: store i32 97
>>   wchar_t wa = L'a';
>> @@ -27,26 +42,11 @@
>>   // CHECK-CPP0X: store i16 97
>>   char16_t ua = u'a';
>>
>> -  // Should pick second character.
>> -  // CHECK-CPP0X: store i16 98
>> -  char16_t ub = u'ab';
>> -
>>   // CHECK-CPP0X: store i32 97
>>   char32_t Ua = U'a';
>>
>> -  // Should pick second character.
>> -  // CHECK-CPP0X: store i32 98
>> -  char32_t Ub = U'ab';
>>  #endif
>>
>> -  // Should pick last character and store its lowest byte.
>> -  // This does not match gcc, which takes the last character, converts it to
>> -  // utf8, and then picks the second-lowest byte of that (they probably store
>> -  // the utf8 in uint16_ts internally and take the lower byte of that).
>> -  // CHECK-C: store i8 48
>> -  // CHECK-CPP0X: store i8 48
>> -  char c = '\u1120\u0220\U00102030';
>> -
>
> You're removing these codegen tests but aren't them replacing with
> anything that tests this input. Is this intentional?

Yes, this input is no longer valid. Unicode character literals
prohibit multiple characters. There are tests for this error in
Lexer/char-literal.cpp.

>
>>   // CHECK-C: store i32 61451
>>   // CHECK-CPP0X: store i32 61451
>>   wchar_t wc = L'\uF00B';
>> @@ -65,13 +65,6 @@
>>   wchar_t wd = L'\U0010F00B';
>>
>>  #if __cplusplus >= 201103L
>> -  // Should take lower word of the 4byte UNC sequence. This does not match
>> -  // gcc. I don't understand what gcc does (it looks like it converts to utf16,
>> -  // then takes the second (!) utf16 word, swaps the lower two nibbles, and
>> -  // stores that?).
>> -  // CHECK-CPP0X: store i16 -4085
>> -  char16_t ud = u'\U0010F00B';  // has utf16 encoding dbc8 dcb0
>> -
>>   // CHECK-CPP0X: store i32 1110027
>>   char32_t Ud = U'\U0010F00B';
>>  #endif
>> @@ -80,14 +73,4 @@
>>   // CHECK-C: store i32 1110027
>>   // CHECK-CPP0X: store i32 1110027
>>   wchar_t we = L'\u1234\U0010F00B';
>> -
>> -#if __cplusplus >= 201103L
>> -  // Should pick second character.
>> -  // CHECK-CPP0X: store i16 -4085
>> -  char16_t ue = u'\u1234\U0010F00B';
>> -
>> -  // Should pick second character.
>> -  // CHECK-CPP0X: store i32 1110027
>> -  char32_t Ue = U'\u1234\U0010F00B';
>> -#endif
>>  }
>>
>> Modified: cfe/trunk/test/CodeGen/string-literal-short-wstring.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal-short-wstring.c?rev=148392&r1=148391&r2=148392&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/string-literal-short-wstring.c (original)
>> +++ cfe/trunk/test/CodeGen/string-literal-short-wstring.c Wed Jan 18 06:27:10 2012
>> @@ -29,15 +29,4 @@
>>   // -4085 == 0xf00b
>>   // CHECK: store i16 -4085
>>   wchar_t wc = L'\uF00B';
>> -
>> -  // Should take lower word of the 4byte UNC sequence. This does not match
>> -  // gcc. I don't understand what gcc does (it looks like it converts to utf16,
>> -  // then takes the second (!) utf16 word, swaps the lower two nibbles, and
>> -  // stores that?).
>> -  // CHECK: store i16 -4085
>> -  wchar_t wd = L'\U0010F00B';  // has utf16 encoding dbc8 dcb0
>> -
>> -  // Should pick second character. (gcc: -9205)
>> -  // CHECK: store i16 -4085
>> -  wchar_t we = L'\u1234\U0010F00B';
>>  }
>>
>> Added: cfe/trunk/test/Lexer/char-literal-encoding-error.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/char-literal-encoding-error.c?rev=148392&view=auto
>> ==============================================================================
>> --- cfe/trunk/test/Lexer/char-literal-encoding-error.c (added)
>> +++ cfe/trunk/test/Lexer/char-literal-encoding-error.c Wed Jan 18 06:27:10 2012
>> @@ -0,0 +1,10 @@
>> +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -x c++ %s
>> +
>> +// This file is encoded using ISO-8859-1
>> +
>> +int main() {
>> +  'é'; // expected-error {{illegal sequence in character literal}}
>> +  u'é'; // expected-error {{illegal sequence in character literal}}
>> +  U'é'; // expected-error {{illegal sequence in character literal}}
>> +  L'é'; // expected-error {{illegal sequence in character literal}}
>> +}
>>
>> Added: cfe/trunk/test/Lexer/char-literal.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/char-literal.cpp?rev=148392&view=auto
>> ==============================================================================
>> --- cfe/trunk/test/Lexer/char-literal.cpp (added)
>> +++ cfe/trunk/test/Lexer/char-literal.cpp Wed Jan 18 06:27:10 2012
>> @@ -0,0 +1,24 @@
>> +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify %s
>> +
>> +int a = 'ab'; // expected-warning {{multi-character character constant}}
>> +int b = '\xFF\xFF'; // expected-warning {{multi-character character constant}}
>> +int c = 'APPS'; // expected-warning {{multi-character character constant}}
>> +
>> +char d = 'âŒ˜'; // expected-error {{character too large for enclosing character literal type}}
>> +char e = '\u2318'; // expected-error {{character too large for enclosing character literal type}}
>> +
>> +auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constant}}
>> +
>> +char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
>> +char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}}
>> +
>> +wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
>> +wchar_t j = L'\U0010FFFD';
>> +
>> +char32_t k = U'\U0010FFFD';
>> +
>> +char l = 'Ã˜'; // expected-error {{character too large for enclosing character literal type}}
>> +char m = 'ðŸ‘¿'; // expected-error {{character too large for enclosing character literal type}}
>> +
>> +char32_t n = U'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
>> +char16_t o = 'ðŸ‘½'; // expected-error {{character too large for enclosing character literal type}}
>>
>> Modified: cfe/trunk/test/Lexer/constants.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/constants.c?rev=148392&r1=148391&r2=148392&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/Lexer/constants.c (original)
>> +++ cfe/trunk/test/Lexer/constants.c Wed Jan 18 06:27:10 2012
>> @@ -66,4 +66,4 @@
>>  // PR7888
>>  double g = 1e100000000; // expected-warning {{too large}}
>>
>> -char h = '\u1234'; // expected-warning {{character unicode escape sequence too long for its type}}
>> +char h = '\u1234'; // expected-error {{character too large for enclosing character literal type}}
>>
>> Modified: cfe/trunk/test/Lexer/utf8-char-literal.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/utf8-char-literal.cpp?rev=148392&r1=148391&r2=148392&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/Lexer/utf8-char-literal.cpp (original)
>> +++ cfe/trunk/test/Lexer/utf8-char-literal.cpp Wed Jan 18 06:27:10 2012
>> @@ -1,4 +1,5 @@
>>  // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
>>
>> -int array0[u'ñ' == u'\xf1'? 1 : -1];
>> -int array1['ñ' !=  u'\xf1'? 1 : -1];
>> +int array0[u'Ã±' == u'\xf1'? 1 : -1];
>> +int array1['\xF1' !=  u'\xf1'? 1 : -1];
>> +int array1['Ã±' !=  u'\xf1'? 1 : -1]; // expected-error {{character too large for enclosing character literal type}}
>>
>> Modified: cfe/trunk/test/Lexer/wchar.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/wchar.c?rev=148392&r1=148391&r2=148392&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/Lexer/wchar.c (original)
>> +++ cfe/trunk/test/Lexer/wchar.c Wed Jan 18 06:27:10 2012
>> @@ -1,9 +1,9 @@
>>  // RUN: %clang_cc1 -fsyntax-only -fshort-wchar -verify %s
>>
>>  void f() {
>> -  (void)L"\U00010000";  // expected-warning {{character unicode escape sequence too long for its type}}
>> +  (void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning
>>
>> -  (void)L'\U00010000';  // expected-warning {{character unicode escape sequence too long for its type}}
>> +  (void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}}
>>
>>   (void)L'ab';  // expected-warning {{extraneous characters in character constant ignored}}
>>
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits