[clang] [Clang] allow `` `@$ `` in raw string delimiters in C++26 (PR #93216)
via cfe-commits
cfe-commits at lists.llvm.org
Sat May 25 01:58:55 PDT 2024
https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/93216
>From 556c622275c630b74c0f9000c5c599ff665595e1 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Thu, 23 May 2024 18:45:58 +0200
Subject: [PATCH 1/2] [Clang] allow `` `@$ `` in raw string delimiters in C++26
And as an extension in older language modes.
Per https://eel.is/c++draft/lex.string#nt:d-char
Fixes #93130
---
clang/docs/ReleaseNotes.rst | 1 +
clang/include/clang/Basic/CharInfo.h | 15 +++++++-------
.../include/clang/Basic/DiagnosticLexKinds.td | 8 ++++++++
clang/lib/Basic/CharInfo.cpp | 20 +++++++++----------
clang/lib/Lex/Lexer.cpp | 11 +++++++++-
clang/test/Lexer/cxx2c-raw-strings.cpp | 12 +++++++++++
6 files changed, 49 insertions(+), 18 deletions(-)
create mode 100644 clang/test/Lexer/cxx2c-raw-strings.cpp
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7bcdee96e213e..2e298cd9cdb82 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -771,6 +771,7 @@ Bug Fixes to C++ Support
Fixes (#GH87210), (GH89541).
- Clang no longer tries to check if an expression is immediate-escalating in an unevaluated context.
Fixes (#GH91308).
+- Clang now allow ``@$``` in raw string literals. Fixes (#GH93130).
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h
index d807955311828..4d90528f7992e 100644
--- a/clang/include/clang/Basic/CharInfo.h
+++ b/clang/include/clang/Basic/CharInfo.h
@@ -28,8 +28,7 @@ namespace charinfo {
CHAR_LOWER = 0x0040, // a-z
CHAR_UNDER = 0x0080, // _
CHAR_PERIOD = 0x0100, // .
- CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'
- CHAR_PUNCT = 0x0400 // `$@()
+ CHAR_PUNCT = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'`$@()
};
enum {
@@ -152,7 +151,8 @@ LLVM_READONLY inline bool isHexDigit(unsigned char c) {
/// Note that '_' is both a punctuation character and an identifier character!
LLVM_READONLY inline bool isPunctuation(unsigned char c) {
using namespace charinfo;
- return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0;
+ return (InfoTable[c] &
+ (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT | CHAR_PUNCT)) != 0;
}
/// Return true if this character is an ASCII printable character; that is, a
@@ -160,8 +160,8 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) {
/// terminal.
LLVM_READONLY inline bool isPrintable(unsigned char c) {
using namespace charinfo;
- return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT|
- CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0;
+ return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT |
+ CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0;
}
/// Return true if this is the body character of a C preprocessing number,
@@ -175,8 +175,9 @@ LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) {
/// Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
using namespace charinfo;
- return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|
- CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
+ return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT |
+ CHAR_UNDER | CHAR_PUNCT)) != 0 &&
+ c != '(' && c != ')';
}
enum class EscapeChar {
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index ad6bacfb118d4..8411842490c4e 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -111,6 +111,14 @@ def warn_cxx98_compat_raw_string_literal : Warning<
"raw string literals are incompatible with C++98">,
InGroup<CXX98Compat>, DefaultIgnore;
+def warn_cxx26_compat_raw_string_literal_character_set : Warning<
+ "'%0'in a raw string literal delimiter is incompatible "
+ "with standards before C++2c">,
+ InGroup<CXXPre26Compat>, DefaultIgnore;
+def ext_cxx26_raw_string_literal_character_set : Extension<
+ "'%0'in a raw string literal delimiter is a C++2c extension">,
+ InGroup<CXX26>, DefaultIgnore;
+
def warn_multichar_character_literal : Warning<
"multi-character character constant">, InGroup<MultiChar>;
def warn_four_char_character_literal : Warning<
diff --git a/clang/lib/Basic/CharInfo.cpp b/clang/lib/Basic/CharInfo.cpp
index d02054c9718f5..26d693b8e9b94 100644
--- a/clang/lib/Basic/CharInfo.cpp
+++ b/clang/lib/Basic/CharInfo.cpp
@@ -31,20 +31,20 @@ const uint16_t clang::charinfo::InfoTable[256] = {
0 , 0 , 0 , 0 ,
//32 SP 33 ! 34 " 35 #
//36 $ 37 % 38 & 39 '
- CHAR_SPACE , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
- CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+ CHAR_SPACE , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
+ CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
//40 ( 41 ) 42 * 43 +
//44 , 45 - 46 . 47 /
- CHAR_PUNCT , CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL ,
- CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
+ CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
+ CHAR_PUNCT , CHAR_PUNCT , CHAR_PERIOD , CHAR_PUNCT ,
//48 0 49 1 50 2 51 3
//52 4 53 5 54 6 55 7
CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
//56 8 57 9 58 : 59 ;
//60 < 61 = 62 > 63 ?
- CHAR_DIGIT , CHAR_DIGIT , CHAR_RAWDEL , CHAR_RAWDEL ,
- CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+ CHAR_DIGIT , CHAR_DIGIT , CHAR_PUNCT , CHAR_PUNCT ,
+ CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
//64 @ 65 A 66 B 67 C
//68 D 69 E 70 F 71 G
CHAR_PUNCT , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER ,
@@ -59,8 +59,8 @@ const uint16_t clang::charinfo::InfoTable[256] = {
CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
//88 X 89 Y 90 Z 91 [
//92 \ 93 ] 94 ^ 95 _
- CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_RAWDEL ,
- CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
+ CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_PUNCT ,
+ CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_UNDER ,
//96 ` 97 a 98 b 99 c
//100 d 101 e 102 f 103 g
CHAR_PUNCT , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER ,
@@ -75,6 +75,6 @@ const uint16_t clang::charinfo::InfoTable[256] = {
CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
//120 x 121 y 122 z 123 {
//124 | 125 } 126 ~ 127 DEL
- CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_RAWDEL ,
- CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
+ CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_PUNCT ,
+ CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , 0
};
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index c98645993abe0..c7543a48c0b50 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2261,8 +2261,17 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
unsigned PrefixLen = 0;
- while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
+ while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) {
++PrefixLen;
+ if (!isLexingRawMode() &&
+ llvm::is_contained({'$', '@', '`'}, CurPtr[PrefixLen])) {
+ const char *Pos = &CurPtr[PrefixLen];
+ Diag(Pos, LangOpts.CPlusPlus26
+ ? diag::warn_cxx26_compat_raw_string_literal_character_set
+ : diag::ext_cxx26_raw_string_literal_character_set)
+ << StringRef(Pos, 1);
+ }
+ }
// If the last character was not a '(', then we didn't lex a valid delimiter.
if (CurPtr[PrefixLen] != '(') {
diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp b/clang/test/Lexer/cxx2c-raw-strings.cpp
new file mode 100644
index 0000000000000..9181cbc7cf8d4
--- /dev/null
+++ b/clang/test/Lexer/cxx2c-raw-strings.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wc++26-extensions %s
+// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify=cxx26 -Wpre-c++26-compat %s
+
+int main() {
+ (void) R"abc`@$(foobar)abc`@$";
+ //expected-warning at -1 {{'`'in a raw string literal delimiter is a C++2c extension}}
+ //expected-warning at -2 {{'@'in a raw string literal delimiter is a C++2c extension}}
+ //expected-warning at -3 {{'$'in a raw string literal delimiter is a C++2c extension}}
+ //cxx26-warning at -4 {{'`'in a raw string literal delimiter is incompatible with standards before C++2c}}
+ //cxx26-warning at -5 {{'@'in a raw string literal delimiter is incompatible with standards before C++2c}}
+ //cxx26-warning at -6 {{'$'in a raw string literal delimiter is incompatible with standards before C++2c}}
+}
>From 8b3678b9451105bddee430c10d6b54626cee034d Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot at gmail.com>
Date: Fri, 24 May 2024 08:26:03 +0200
Subject: [PATCH 2/2] add space
---
clang/include/clang/Basic/DiagnosticLexKinds.td | 4 ++--
clang/test/Lexer/cxx2c-raw-strings.cpp | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 8411842490c4e..e10ffbabd1da6 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -112,11 +112,11 @@ def warn_cxx98_compat_raw_string_literal : Warning<
InGroup<CXX98Compat>, DefaultIgnore;
def warn_cxx26_compat_raw_string_literal_character_set : Warning<
- "'%0'in a raw string literal delimiter is incompatible "
+ " '%0' in a raw string literal delimiter is incompatible "
"with standards before C++2c">,
InGroup<CXXPre26Compat>, DefaultIgnore;
def ext_cxx26_raw_string_literal_character_set : Extension<
- "'%0'in a raw string literal delimiter is a C++2c extension">,
+ " '%0' in a raw string literal delimiter is a C++2c extension">,
InGroup<CXX26>, DefaultIgnore;
def warn_multichar_character_literal : Warning<
diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp b/clang/test/Lexer/cxx2c-raw-strings.cpp
index 9181cbc7cf8d4..569a4b8447e57 100644
--- a/clang/test/Lexer/cxx2c-raw-strings.cpp
+++ b/clang/test/Lexer/cxx2c-raw-strings.cpp
@@ -3,10 +3,10 @@
int main() {
(void) R"abc`@$(foobar)abc`@$";
- //expected-warning at -1 {{'`'in a raw string literal delimiter is a C++2c extension}}
- //expected-warning at -2 {{'@'in a raw string literal delimiter is a C++2c extension}}
- //expected-warning at -3 {{'$'in a raw string literal delimiter is a C++2c extension}}
- //cxx26-warning at -4 {{'`'in a raw string literal delimiter is incompatible with standards before C++2c}}
- //cxx26-warning at -5 {{'@'in a raw string literal delimiter is incompatible with standards before C++2c}}
- //cxx26-warning at -6 {{'$'in a raw string literal delimiter is incompatible with standards before C++2c}}
+ //expected-warning at -1 {{'`' in a raw string literal delimiter is a C++2c extension}}
+ //expected-warning at -2 {{'@' in a raw string literal delimiter is a C++2c extension}}
+ //expected-warning at -3 {{'$' in a raw string literal delimiter is a C++2c extension}}
+ //cxx26-warning at -4 {{'`' in a raw string literal delimiter is incompatible with standards before C++2c}}
+ //cxx26-warning at -5 {{'@' in a raw string literal delimiter is incompatible with standards before C++2c}}
+ //cxx26-warning at -6 {{'$' in a raw string literal delimiter is incompatible with standards before C++2c}}
}
More information about the cfe-commits
mailing list