[clang] bdeda95 - Make wide multi-character character literals ill-formed
Aaron Ballman via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 20 08:13:01 PDT 2021
Author: Corentin Jabot
Date: 2021-08-20T11:10:53-04:00
New Revision: bdeda959abd74c88a6cfc34b10c1b665cb45cb8d
URL: https://github.com/llvm/llvm-project/commit/bdeda959abd74c88a6cfc34b10c1b665cb45cb8d
DIFF: https://github.com/llvm/llvm-project/commit/bdeda959abd74c88a6cfc34b10c1b665cb45cb8d.diff
LOG: Make wide multi-character character literals ill-formed
This implements P2362, which has not yet been approved by the
C++ committee, but because wide-multi character literals are
implementation defined, clang might not have to wait for WG21.
This change is also being applied in C mode as the behavior is
implementation-defined in C as well and there's no benefit to
having different rules between the languages.
The other part of P2362, making non-representable character
literals ill-formed, is already implemented by clang
Added:
Modified:
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/DiagnosticLexKinds.td
clang/lib/Lex/LiteralSupport.cpp
clang/test/CodeGen/char-literal.c
clang/test/CodeGen/string-literal-short-wstring.c
clang/test/Lexer/char-literal.cpp
clang/test/Lexer/wchar.c
clang/test/Misc/warning-flags.c
clang/test/Preprocessor/Weverything_pragma.c
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7604df7482c7..f728f5b4fcfc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -100,7 +100,9 @@ Windows Support
C Language Changes in Clang
---------------------------
-- ...
+- Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'``
+ are now ill-formed in all language modes. The motivation for this change is outlined in
+ `P2362 <wg21.link/P2362>`_.
C++ Language Changes in Clang
-----------------------------
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 45a5b62af461..c19adf104db1 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -183,12 +183,10 @@ def warn_c2x_compat_digit_separator : Warning<
InGroup<CPre2xCompat>, DefaultIgnore;
def err_digit_separator_not_between_digits : Error<
"digit separator cannot appear at %select{start|end}0 of digit sequence">;
-def warn_extraneous_char_constant : Warning<
- "extraneous characters in character constant ignored">;
def warn_char_constant_too_large : Warning<
"character constant too long for its type">;
-def err_multichar_utf_character_literal : Error<
- "Unicode character literals may not contain multiple characters">;
+def err_multichar_character_literal : Error<
+ "%select{wide|Unicode}0 character literals may not contain multiple characters">;
def err_exponent_has_no_digits : Error<"exponent has no digits">;
def err_hex_constant_requires : Error<
"hexadecimal floating %select{constant|literal}0 requires "
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 85d826ce9c6f..f012fb72580e 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -1390,14 +1390,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
if (NumCharsSoFar > 1) {
- if (isWide())
- PP.Diag(Loc, diag::warn_extraneous_char_constant);
- else if (isAscii() && NumCharsSoFar == 4)
+ if (isAscii() && NumCharsSoFar == 4)
PP.Diag(Loc, diag::warn_four_char_character_literal);
else if (isAscii())
PP.Diag(Loc, diag::warn_multichar_character_literal);
- else
- PP.Diag(Loc, diag::err_multichar_utf_character_literal);
+ else {
+ PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
+ HadError = true;
+ }
IsMultiChar = true;
} else {
IsMultiChar = false;
diff --git a/clang/test/CodeGen/char-literal.c b/clang/test/CodeGen/char-literal.c
index 6fdf8b7c02b1..c7a2a7bee471 100644
--- a/clang/test/CodeGen/char-literal.c
+++ b/clang/test/CodeGen/char-literal.c
@@ -1,5 +1,4 @@
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
-// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
// RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s
#include <stddef.h>
@@ -33,11 +32,6 @@ int main() {
// CHECK-CPP0X: store i32 97
wchar_t wa = L'a';
- // Should pick second character.
- // CHECK-C: store i32 98
- // CHECK-CPP0X: store i32 98
- wchar_t wb = L'ab';
-
#if __cplusplus >= 201103L
// CHECK-CPP0X: store i16 97
char16_t ua = u'a';
@@ -83,8 +77,4 @@ int main() {
char32_t Ud = U'\U0010F00B';
#endif
- // Should pick second character.
- // CHECK-C: store i32 1110027
- // CHECK-CPP0X: store i32 1110027
- wchar_t we = L'\u1234\U0010F00B';
}
diff --git a/clang/test/CodeGen/string-literal-short-wstring.c b/clang/test/CodeGen/string-literal-short-wstring.c
index 8894b8823e91..899a82021846 100644
--- a/clang/test/CodeGen/string-literal-short-wstring.c
+++ b/clang/test/CodeGen/string-literal-short-wstring.c
@@ -1,11 +1,14 @@
-// RUN: %clang_cc1 -x c++ -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
-// RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
-// Runs in c++ mode so that wchar_t is available.
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
+// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
+
+// Run in C mode as wide multichar literals are not valid in C++
// XFAIL: hexagon
// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails
// the first check line with "align 1".
+typedef __WCHAR_TYPE__ wchar_t;
+
int main() {
// This should convert to utf8.
// CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
@@ -20,8 +23,6 @@ int main() {
// MSABI: linkonce_odr dso_local unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0]
const wchar_t *bar = L"\u1120\u0220\U00102030";
-
-
// Should pick second character.
// CHECK: store i8 98
char c = 'ab';
@@ -29,10 +30,6 @@ int main() {
// CHECK: store i16 97
wchar_t wa = L'a';
- // Should pick second character.
- // CHECK: store i16 98
- wchar_t wb = L'ab';
-
// -4085 == 0xf00b
// CHECK: store i16 -4085
wchar_t wc = L'\uF00B';
diff --git a/clang/test/Lexer/char-literal.cpp b/clang/test/Lexer/char-literal.cpp
index 1cd14a9b0116..a71500f8f010 100644
--- a/clang/test/Lexer/char-literal.cpp
+++ b/clang/test/Lexer/char-literal.cpp
@@ -21,7 +21,8 @@ auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constan
char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}}
-wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
+wchar_t i = L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
+
wchar_t j = L'\U0010FFFD';
char32_t k = U'\U0010FFFD';
diff --git a/clang/test/Lexer/wchar.c b/clang/test/Lexer/wchar.c
index 47417382c954..4ecaee429f73 100644
--- a/clang/test/Lexer/wchar.c
+++ b/clang/test/Lexer/wchar.c
@@ -3,10 +3,8 @@
void f() {
(void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning
- (void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}}
+ (void)L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
- (void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
-
- (void)L'a\u1000'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void)L'a\u1000'; // expected-error {{wide character literals may not contain multiple characters}}
}
diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c
index e4f9069b88c8..a9e0a784c5c8 100644
--- a/clang/test/Misc/warning-flags.c
+++ b/clang/test/Misc/warning-flags.c
@@ -18,7 +18,7 @@ This test serves two purposes:
The list of warnings below should NEVER grow. It should gradually shrink to 0.
-CHECK: Warnings without flags (68):
+CHECK: Warnings without flags (67):
CHECK-NEXT: ext_expected_semi_decl_list
CHECK-NEXT: ext_explicit_specialization_storage_class
@@ -50,7 +50,6 @@ CHECK-NEXT: warn_drv_pch_not_first_include
CHECK-NEXT: warn_dup_category_def
CHECK-NEXT: warn_enum_value_overflow
CHECK-NEXT: warn_expected_qualified_after_typename
-CHECK-NEXT: warn_extraneous_char_constant
CHECK-NEXT: warn_fe_backend_unsupported
CHECK-NEXT: warn_fe_cc_log_diagnostics_failure
CHECK-NEXT: warn_fe_cc_print_header_failure
diff --git a/clang/test/Preprocessor/Weverything_pragma.c b/clang/test/Preprocessor/Weverything_pragma.c
index f2cf97ed4a1c..5300e7aebf46 100644
--- a/clang/test/Preprocessor/Weverything_pragma.c
+++ b/clang/test/Preprocessor/Weverything_pragma.c
@@ -10,21 +10,21 @@ void foo(void) // expected-warning {{no previous prototype for function}}
// expected-note at -1{{declare 'static' if the function is not intended to be used outside of this translation unit}}
{
// A diagnostic without DefaultIgnore, and not part of a group.
- (void) L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void) 'ab'; // expected-warning {{multi-character character constant}}
#pragma clang diagnostic warning "-Weverything" // Should not change anyhting.
#define UNUSED_MACRO2 1 // expected-warning{{macro is not used}}
- (void) L'cd'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void) 'cd'; // expected-warning {{multi-character character constant}}
#pragma clang diagnostic ignored "-Weverything" // Ignore warnings now.
#define UNUSED_MACRO2 1 // no warning
- (void) L'ef'; // no warning here
+ (void) 'ef'; // no warning here
#pragma clang diagnostic warning "-Weverything" // Revert back to warnings.
#define UNUSED_MACRO3 1 // expected-warning{{macro is not used}}
- (void) L'gh'; // expected-warning {{extraneous characters in character constant ignored}}
+ (void) 'gh'; // expected-warning {{multi-character character constant}}
#pragma clang diagnostic error "-Weverything" // Give errors now.
#define UNUSED_MACRO4 1 // expected-error{{macro is not used}}
- (void) L'ij'; // expected-error {{extraneous characters in character constant ignored}}
+ (void) 'ij'; // expected-error {{multi-character character constant}}
}
More information about the cfe-commits
mailing list