[clang] e464684 - [Clang] Allow raw string literals in C as an extension (#88265)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 10 03:10:48 PDT 2024


Author: Sirraide
Date: 2024-07-10T12:10:44+02:00
New Revision: e46468407a7bb7f8b2fe13675a5a1c32b85f8cad

URL: https://github.com/llvm/llvm-project/commit/e46468407a7bb7f8b2fe13675a5a1c32b85f8cad
DIFF: https://github.com/llvm/llvm-project/commit/e46468407a7bb7f8b2fe13675a5a1c32b85f8cad.diff

LOG: [Clang] Allow raw string literals in C as an extension (#88265)

This enables raw R"" string literals in C in some language modes
and adds an option to disable or enable them explicitly as an
extension.

Background: GCC supports raw string literals in C in `-gnuXY` modes
starting with gnu99. This pr both enables raw string literals in gnu99 
mode and later in C and adds an `-f[no-]raw-string-literals` flag to override 
this behaviour. The decision not to enable raw string literals in gnu89
mode, according to the GCC devs, is intentional as that mode is supposed
to be used for ‘old code’ that they don’t want to break; we’ve decided to
match GCC’s behaviour here as well.

The `-fraw-string-literals`  flag can additionally be used to enable raw string 
literals in modes where they aren’t enabled by default (such as c99—as 
opposed to gnu99—or even e.g. C++03); conversely, the negated flag can 
be used to disable them in any gnuXY modes that *do* provide them by 
default, or to override a previous flag. However, we do *not*  support 
disabling raw string literals (or indeed either of these two options) in 
C++11 mode and later, because we don’t want to just start supporting 
disabling features that are actually part of the language in the general case.

This fixes #85703.

Added: 
    clang/test/Driver/fraw-string-literals-cxx.cpp
    clang/test/Lexer/raw-string-ext.c

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/DiagnosticDriverKinds.td
    clang/include/clang/Basic/LangOptions.def
    clang/include/clang/Basic/LangStandard.h
    clang/include/clang/Driver/Options.td
    clang/lib/Basic/LangOptions.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Frontend/CompilerInvocation.cpp
    clang/lib/Lex/DependencyDirectivesScanner.cpp
    clang/lib/Lex/Lexer.cpp
    clang/unittests/Lex/DependencyDirectivesScannerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index cd3a4c2b1be1a..ebaa8773a262f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -43,6 +43,10 @@ code bases.
 C/C++ Language Potentially Breaking Changes
 -------------------------------------------
 
+- Clang now supports raw string literals in ``-std=gnuXY`` mode as an extension in
+  C99 and later. This behaviour can also be overridden using ``-f[no-]raw-string-literals``.
+  Support of raw string literals in C++ is not affected. Fixes (#GH85703).
+
 C++ Specific Potentially Breaking Changes
 -----------------------------------------
 - Clang now diagnoses function/variable templates that shadow their own template parameters, e.g. ``template<class T> void T();``.

diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 243d88d53d664..359c0de7f811c 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -375,6 +375,9 @@ def err_drv_negative_columns : Error<
   "invalid value '%1' in '%0', value must be 'none' or a positive integer">;
 def err_drv_small_columns : Error<
   "invalid value '%1' in '%0', value must be '%2' or greater">;
+def warn_drv_fraw_string_literals_in_cxx11 : Warning<
+  "ignoring '-f%select{no-|}0raw-string-literals', which is only valid for C and C++ standards before C++11">,
+  InGroup<UnusedCommandLineArgument>;
 
 def err_drv_invalid_malign_branch_EQ : Error<
   "invalid argument '%0' to -malign-branch=; each element must be one of: %1">;

diff  --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 491759e2fcdbb..d806b917432ad 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -465,6 +465,8 @@ LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type")
 
 LANGOPT(CXXAssumptions, 1, 1, "Enable or disable codegen and compile-time checks for C++23's [[assume]] attribute")
 
+LANGOPT(RawStringLiterals, 1, 1, "Enable or disable raw string literals")
+
 ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
              StrictFlexArraysLevelKind::Default,
              "Rely on strict definition of flexible arrays")

diff  --git a/clang/include/clang/Basic/LangStandard.h b/clang/include/clang/Basic/LangStandard.h
index f79b4aafb0b26..56a0d2c95e2b1 100644
--- a/clang/include/clang/Basic/LangStandard.h
+++ b/clang/include/clang/Basic/LangStandard.h
@@ -134,6 +134,13 @@ struct LangStandard {
   /// hasDigraphs - Language supports digraphs.
   bool hasDigraphs() const { return Flags & Digraphs; }
 
+  /// hasRawStringLiterals - Language supports R"()" raw string literals.
+  bool hasRawStringLiterals() const {
+    // GCC supports raw string literals in C99 and later, but not in C++
+    // before C++11.
+    return isCPlusPlus11() || (!isCPlusPlus() && isC99() && isGNUMode());
+  }
+
   /// isGNUMode - Language includes GNU extensions.
   bool isGNUMode() const { return Flags & GNUMode; }
 

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index be7c3b60c20f1..fae865571001c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4240,6 +4240,12 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group<f_Group>,
     HelpText<"Enable matrix data type and related builtin functions">,
     MarshallingInfoFlag<LangOpts<"MatrixTypes">>;
 
+defm raw_string_literals : BoolFOption<"raw-string-literals",
+    LangOpts<"RawStringLiterals">, Default<std#".hasRawStringLiterals()">,
+    PosFlag<SetTrue, [], [], "Enable">,
+    NegFlag<SetFalse, [], [], "Disable">,
+    BothFlags<[], [ClangOption, CC1Option], " raw string literals">>;
+
 def fzero_call_used_regs_EQ
     : Joined<["-"], "fzero-call-used-regs=">, Group<f_Group>,
     Visibility<[ClangOption, CC1Option]>,

diff  --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 61072b7b81bff..e5adc034f60c1 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -125,6 +125,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
   Opts.HexFloats = Std.hasHexFloats();
   Opts.WChar = Std.isCPlusPlus();
   Opts.Digraphs = Std.hasDigraphs();
+  Opts.RawStringLiterals = Std.hasRawStringLiterals();
 
   Opts.HLSL = Lang == Language::HLSL;
   if (Opts.HLSL && Opts.IncludeDefaultHeader)

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6ba0d56aff33c..edb3c20ec9768 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6519,6 +6519,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
   Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs);
   Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ);
+  Args.AddLastArg(CmdArgs, options::OPT_fraw_string_literals,
+                  options::OPT_fno_raw_string_literals);
 
   if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
                    Triple.hasDefaultEmulatedTLS()))

diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index f42e28ba7e629..0082c15aac7e4 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -610,6 +610,19 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
     LangOpts.NewAlignOverride = 0;
   }
 
+  // The -f[no-]raw-string-literals option is only valid in C and in C++
+  // standards before C++11.
+  if (LangOpts.CPlusPlus11) {
+    if (Args.hasArg(OPT_fraw_string_literals, OPT_fno_raw_string_literals)) {
+      Args.claimAllArgs(OPT_fraw_string_literals, OPT_fno_raw_string_literals);
+      Diags.Report(diag::warn_drv_fraw_string_literals_in_cxx11)
+          << bool(LangOpts.RawStringLiterals);
+    }
+
+    // Do not allow disabling raw string literals in C++11 or later.
+    LangOpts.RawStringLiterals = true;
+  }
+
   // Prevent the user from specifying both -fsycl-is-device and -fsycl-is-host.
   if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsHost)
     Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fsycl-is-device"

diff  --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 0971daa1f3666..57652be8244b4 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -73,8 +73,8 @@ struct Scanner {
     // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
     LangOpts.ObjC = true;
     LangOpts.LineComment = true;
-    // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
-    // R"()" literals.
+    LangOpts.RawStringLiterals = true;
+    // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".
     return LangOpts;
   }
 

diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index e59c7805b3862..ef1e1f4bd9aeb 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3876,7 +3876,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
                                tok::utf16_char_constant);
 
       // UTF-16 raw string literal
-      if (Char == 'R' && LangOpts.CPlusPlus11 &&
+      if (Char == 'R' && LangOpts.RawStringLiterals &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
         return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -3898,7 +3898,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
                                   SizeTmp2, Result),
               tok::utf8_char_constant);
 
-        if (Char2 == 'R' && LangOpts.CPlusPlus11) {
+        if (Char2 == 'R' && LangOpts.RawStringLiterals) {
           unsigned SizeTmp3;
           char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
           // UTF-8 raw string literal
@@ -3934,7 +3934,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
                                tok::utf32_char_constant);
 
       // UTF-32 raw string literal
-      if (Char == 'R' && LangOpts.CPlusPlus11 &&
+      if (Char == 'R' && LangOpts.RawStringLiterals &&
           getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
         return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
@@ -3949,7 +3949,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
-    if (LangOpts.CPlusPlus11) {
+    if (LangOpts.RawStringLiterals) {
       Char = getCharAndSize(CurPtr, SizeTmp);
 
       if (Char == '"')
@@ -3972,7 +3972,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
                               tok::wide_string_literal);
 
     // Wide raw string literal.
-    if (LangOpts.CPlusPlus11 && Char == 'R' &&
+    if (LangOpts.RawStringLiterals && Char == 'R' &&
         getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
       return LexRawStringLiteral(Result,
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),

diff  --git a/clang/test/Driver/fraw-string-literals-cxx.cpp b/clang/test/Driver/fraw-string-literals-cxx.cpp
new file mode 100644
index 0000000000000..f831bf5344e63
--- /dev/null
+++ b/clang/test/Driver/fraw-string-literals-cxx.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang -fraw-string-literals    -fsyntax-only -std=c++03   %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
+// RUN: %clang -fraw-string-literals    -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
+// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++03   %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
+// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++03 %s 2>&1 | FileCheck --check-prefix=CHECK-PRE-CXX11 --allow-empty %s
+// RUN: %clang -fraw-string-literals    -fsyntax-only -std=c++11   %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
+// RUN: %clang -fraw-string-literals    -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
+// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++11   %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++11 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: %clang -fraw-string-literals    -fsyntax-only -std=c++20   %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
+// RUN: %clang -fraw-string-literals    -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-POS %s
+// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=c++20   %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: %clang -fno-raw-string-literals -fsyntax-only -std=gnu++20 %s 2>&1 | FileCheck --check-prefix=CHECK-NEG %s
+
+// CHECK-PRE-CXX11-NOT: ignoring '-fraw-string-literals'
+// CHECK-PRE-CXX11-NOT: ignoring '-fno-raw-string-literals'
+// CHECK-POS: ignoring '-fraw-string-literals', which is only valid for C and C++ standards before C++11
+// CHECK-NEG: ignoring '-fno-raw-string-literals', which is only valid for C and C++ standards before C++11

diff  --git a/clang/test/Lexer/raw-string-ext.c b/clang/test/Lexer/raw-string-ext.c
new file mode 100644
index 0000000000000..de318b616df70
--- /dev/null
+++ b/clang/test/Lexer/raw-string-ext.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -verify=supported %s
+// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -fraw-string-literals -verify=supported %s
+// RUN: %clang_cc1 -fsyntax-only -std=gnu89 -verify=unsupported %s
+// RUN: %clang_cc1 -fsyntax-only -std=c11 -DUNICODE -verify=unsupported %s
+// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -DUNICODE -fno-raw-string-literals -verify=unsupported %s
+
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -verify=unsupported,cxx-unsupported %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -verify=unsupported,cxx-unsupported %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++03 -fraw-string-literals -verify=supported %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++03 -fraw-string-literals -verify=supported %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -verify=supported,cxx %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -verify=supported,cxx %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fraw-string-literals -verify=supported,yes %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=c++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -Wno-unused -std=gnu++11 -DUNICODE -fno-raw-string-literals -verify=supported,no %s
+
+// GCC supports raw string literals in C99 and later in '-std=gnuXY' mode; we
+// additionally provide '-f[no-]raw-string-literals' to enable/disable them
+// explicitly in C.
+//
+// We do not allow disabling raw string literals in C++ mode if they’re enabled
+// by the language standard, i.e. in C++11 or later.
+
+// Driver warnings.
+// yes-warning@* {{ignoring '-fraw-string-literals'}}
+// no-warning@* {{ignoring '-fno-raw-string-literals'}}
+
+void f() {
+  (void) R"foo()foo"; // unsupported-error {{use of undeclared identifier 'R'}} cxx-unsupported-error {{expected ';' after expression}}
+  (void) LR"foo()foo"; // unsupported-error {{use of undeclared identifier 'LR'}} cxx-unsupported-error {{expected ';' after expression}}
+
+#ifdef UNICODE
+  (void) uR"foo()foo"; // unsupported-error {{use of undeclared identifier 'uR'}} cxx-unsupported-error {{expected ';' after expression}}
+  (void) u8R"foo()foo"; // unsupported-error {{use of undeclared identifier 'u8R'}} cxx-unsupported-error {{expected ';' after expression}}
+  (void) UR"foo()foo"; // unsupported-error {{use of undeclared identifier 'UR'}} cxx-unsupported-error {{expected ';' after expression}}
+#endif
+}
+
+// supported-error@* {{missing terminating delimiter}}
+// supported-error@* {{expected expression}}
+// supported-error@* {{expected ';' after top level declarator}}
+#define R "bar"
+const char* s =  R"foo(";

diff  --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index 59fef9ecbb9c9..94af9688a96e2 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -583,10 +583,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, UnderscorePragma) {
       R"(_Pragma(u"clang module import"))", Out));
   EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
 
-  // FIXME: R"()" strings depend on using C++11 language mode
+  // R"()" strings are enabled by default.
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       R"(_Pragma(R"abc(clang module import)abc"))", Out));
-  EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
+  EXPECT_STREQ(R"(_Pragma(R"abc(clang module import)abc"))"
+               "\n",
+               Out.data());
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, Include) {


        


More information about the cfe-commits mailing list