[clang] bdeda95 - Make wide multi-character character literals ill-formed

Aaron Ballman via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 20 08:13:01 PDT 2021


Author: Corentin Jabot
Date: 2021-08-20T11:10:53-04:00
New Revision: bdeda959abd74c88a6cfc34b10c1b665cb45cb8d

URL: https://github.com/llvm/llvm-project/commit/bdeda959abd74c88a6cfc34b10c1b665cb45cb8d
DIFF: https://github.com/llvm/llvm-project/commit/bdeda959abd74c88a6cfc34b10c1b665cb45cb8d.diff

LOG: Make wide multi-character character literals ill-formed

This implements P2362, which has not yet been approved by the
C++ committee, but because wide-multi character literals are
implementation defined, clang might not have to wait for WG21.

This change is also being applied in C mode as the behavior is
implementation-defined in C as well and there's no benefit to
having different rules between the languages.

The other part of P2362, making non-representable character
literals ill-formed, is already implemented by clang

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/DiagnosticLexKinds.td
    clang/lib/Lex/LiteralSupport.cpp
    clang/test/CodeGen/char-literal.c
    clang/test/CodeGen/string-literal-short-wstring.c
    clang/test/Lexer/char-literal.cpp
    clang/test/Lexer/wchar.c
    clang/test/Misc/warning-flags.c
    clang/test/Preprocessor/Weverything_pragma.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7604df7482c7..f728f5b4fcfc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -100,7 +100,9 @@ Windows Support
 C Language Changes in Clang
 ---------------------------
 
-- ...
+- Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'``
+  are now ill-formed in all language modes. The motivation for this change is outlined in
+  `P2362 <wg21.link/P2362>`_.
 
 C++ Language Changes in Clang
 -----------------------------

diff  --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 45a5b62af461..c19adf104db1 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -183,12 +183,10 @@ def warn_c2x_compat_digit_separator : Warning<
   InGroup<CPre2xCompat>, DefaultIgnore;
 def err_digit_separator_not_between_digits : Error<
   "digit separator cannot appear at %select{start|end}0 of digit sequence">;
-def warn_extraneous_char_constant : Warning<
-  "extraneous characters in character constant ignored">;
 def warn_char_constant_too_large : Warning<
   "character constant too long for its type">;
-def err_multichar_utf_character_literal : Error<
-  "Unicode character literals may not contain multiple characters">;
+def err_multichar_character_literal : Error<
+  "%select{wide|Unicode}0 character literals may not contain multiple characters">;
 def err_exponent_has_no_digits : Error<"exponent has no digits">;
 def err_hex_constant_requires : Error<
   "hexadecimal floating %select{constant|literal}0 requires "

diff  --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 85d826ce9c6f..f012fb72580e 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -1390,14 +1390,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
   unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
 
   if (NumCharsSoFar > 1) {
-    if (isWide())
-      PP.Diag(Loc, diag::warn_extraneous_char_constant);
-    else if (isAscii() && NumCharsSoFar == 4)
+    if (isAscii() && NumCharsSoFar == 4)
       PP.Diag(Loc, diag::warn_four_char_character_literal);
     else if (isAscii())
       PP.Diag(Loc, diag::warn_multichar_character_literal);
-    else
-      PP.Diag(Loc, diag::err_multichar_utf_character_literal);
+    else {
+      PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
+      HadError = true;
+    }
     IsMultiChar = true;
   } else {
     IsMultiChar = false;

diff  --git a/clang/test/CodeGen/char-literal.c b/clang/test/CodeGen/char-literal.c
index 6fdf8b7c02b1..c7a2a7bee471 100644
--- a/clang/test/CodeGen/char-literal.c
+++ b/clang/test/CodeGen/char-literal.c
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
-// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
 // RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s
 
 #include <stddef.h>
@@ -33,11 +32,6 @@ int main() {
   // CHECK-CPP0X: store i32 97
   wchar_t wa = L'a';
 
-  // Should pick second character.
-  // CHECK-C: store i32 98
-  // CHECK-CPP0X: store i32 98
-  wchar_t wb = L'ab';
-
 #if __cplusplus >= 201103L
   // CHECK-CPP0X: store i16 97
   char16_t ua = u'a';
@@ -83,8 +77,4 @@ int main() {
   char32_t Ud = U'\U0010F00B';
 #endif
 
-  // Should pick second character.
-  // CHECK-C: store i32 1110027
-  // CHECK-CPP0X: store i32 1110027
-  wchar_t we = L'\u1234\U0010F00B';
 }

diff  --git a/clang/test/CodeGen/string-literal-short-wstring.c b/clang/test/CodeGen/string-literal-short-wstring.c
index 8894b8823e91..899a82021846 100644
--- a/clang/test/CodeGen/string-literal-short-wstring.c
+++ b/clang/test/CodeGen/string-literal-short-wstring.c
@@ -1,11 +1,14 @@
-// RUN: %clang_cc1 -x c++ -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
-// RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
-// Runs in c++ mode so that wchar_t is available.
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
+// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
+
+// Run in C mode as wide multichar literals are not valid in C++
 
 // XFAIL: hexagon
 // Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails
 // the first check line with "align 1".
 
+typedef __WCHAR_TYPE__ wchar_t;
+
 int main() {
   // This should convert to utf8.
   // CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
@@ -20,8 +23,6 @@ int main() {
   // MSABI: linkonce_odr dso_local unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0]
   const wchar_t *bar = L"\u1120\u0220\U00102030";
 
-
-
   // Should pick second character.
   // CHECK: store i8 98
   char c = 'ab';
@@ -29,10 +30,6 @@ int main() {
   // CHECK: store i16 97
   wchar_t wa = L'a';
 
-  // Should pick second character.
-  // CHECK: store i16 98
-  wchar_t wb = L'ab';
-
   // -4085 == 0xf00b
   // CHECK: store i16 -4085
   wchar_t wc = L'\uF00B';

diff  --git a/clang/test/Lexer/char-literal.cpp b/clang/test/Lexer/char-literal.cpp
index 1cd14a9b0116..a71500f8f010 100644
--- a/clang/test/Lexer/char-literal.cpp
+++ b/clang/test/Lexer/char-literal.cpp
@@ -21,7 +21,8 @@ auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constan
 char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
 char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}}
 
-wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
+wchar_t i = L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
+
 wchar_t j = L'\U0010FFFD';
 
 char32_t k = U'\U0010FFFD';

diff  --git a/clang/test/Lexer/wchar.c b/clang/test/Lexer/wchar.c
index 47417382c954..4ecaee429f73 100644
--- a/clang/test/Lexer/wchar.c
+++ b/clang/test/Lexer/wchar.c
@@ -3,10 +3,8 @@
 void f() {
   (void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning
 
-  (void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}}
+  (void)L'ab';  // expected-error {{wide character literals may not contain multiple characters}}
 
-  (void)L'ab';  // expected-warning {{extraneous characters in character constant ignored}}
-
-  (void)L'a\u1000';  // expected-warning {{extraneous characters in character constant ignored}}
+  (void)L'a\u1000';  // expected-error {{wide character literals may not contain multiple characters}}
 }
 

diff  --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c
index e4f9069b88c8..a9e0a784c5c8 100644
--- a/clang/test/Misc/warning-flags.c
+++ b/clang/test/Misc/warning-flags.c
@@ -18,7 +18,7 @@ This test serves two purposes:
 
 The list of warnings below should NEVER grow.  It should gradually shrink to 0.
 
-CHECK: Warnings without flags (68):
+CHECK: Warnings without flags (67):
 
 CHECK-NEXT:   ext_expected_semi_decl_list
 CHECK-NEXT:   ext_explicit_specialization_storage_class
@@ -50,7 +50,6 @@ CHECK-NEXT:   warn_drv_pch_not_first_include
 CHECK-NEXT:   warn_dup_category_def
 CHECK-NEXT:   warn_enum_value_overflow
 CHECK-NEXT:   warn_expected_qualified_after_typename
-CHECK-NEXT:   warn_extraneous_char_constant
 CHECK-NEXT:   warn_fe_backend_unsupported
 CHECK-NEXT:   warn_fe_cc_log_diagnostics_failure
 CHECK-NEXT:   warn_fe_cc_print_header_failure

diff  --git a/clang/test/Preprocessor/Weverything_pragma.c b/clang/test/Preprocessor/Weverything_pragma.c
index f2cf97ed4a1c..5300e7aebf46 100644
--- a/clang/test/Preprocessor/Weverything_pragma.c
+++ b/clang/test/Preprocessor/Weverything_pragma.c
@@ -10,21 +10,21 @@ void foo(void) // expected-warning {{no previous prototype for function}}
 // expected-note at -1{{declare 'static' if the function is not intended to be used outside of this translation unit}}
 {
  // A diagnostic without DefaultIgnore, and not part of a group.
- (void) L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void) 'ab'; // expected-warning {{multi-character character constant}}
 
 #pragma clang diagnostic warning "-Weverything" // Should not change anyhting.
 #define UNUSED_MACRO2 1 // expected-warning{{macro is not used}}
- (void) L'cd'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void) 'cd'; // expected-warning {{multi-character character constant}}
 
 #pragma clang diagnostic ignored "-Weverything" // Ignore warnings now.
 #define UNUSED_MACRO2 1 // no warning
- (void) L'ef'; // no warning here
+ (void) 'ef'; // no warning here
 
 #pragma clang diagnostic warning "-Weverything" // Revert back to warnings.
 #define UNUSED_MACRO3 1 // expected-warning{{macro is not used}}
- (void) L'gh'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void) 'gh'; // expected-warning {{multi-character character constant}}
 
 #pragma clang diagnostic error "-Weverything"  // Give errors now.
 #define UNUSED_MACRO4 1 // expected-error{{macro is not used}}
- (void) L'ij'; // expected-error {{extraneous characters in character constant ignored}}
+ (void) 'ij'; // expected-error {{multi-character character constant}}
 }


        


More information about the cfe-commits mailing list