[clang-tools-extra] 95f5096 - Implement P2361 Unevaluated string literals

Corentin Jabot via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 7 04:30:33 PDT 2023


Author: Corentin Jabot
Date: 2023-07-07T13:30:27+02:00
New Revision: 95f50964fbf59dc6394898c0719ce8772cc24d25

URL: https://github.com/llvm/llvm-project/commit/95f50964fbf59dc6394898c0719ce8772cc24d25
DIFF: https://github.com/llvm/llvm-project/commit/95f50964fbf59dc6394898c0719ce8772cc24d25.diff

LOG: Implement P2361 Unevaluated string literals

This patch proposes to handle in an uniform fashion
the parsing of strings that are never evaluated,
in asm statement, static assert, attrributes, extern,
etc.

Unevaluated strings are UTF-8 internally and so currently
behave as narrow strings, but these things will diverge with
D93031.

The big question both for this patch and the P2361 paper
is whether we risk breaking code by disallowing
encoding prefixes in this context.
I hope this patch may allow to gather some data on that.

Future work:
Improve the rendering of unicode characters, line break
and so forth in static-assert messages

Reviewed By: aaron.ballman, shafik

Differential Revision: https://reviews.llvm.org/D105759

Added: 
    

Modified: 
    clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp
    clang/docs/ReleaseNotes.rst
    clang/include/clang/AST/Expr.h
    clang/include/clang/Basic/DiagnosticLexKinds.td
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/include/clang/Lex/LiteralSupport.h
    clang/include/clang/Parse/Parser.h
    clang/include/clang/Sema/Sema.h
    clang/lib/AST/Expr.cpp
    clang/lib/Lex/LiteralSupport.cpp
    clang/lib/Lex/PPMacroExpansion.cpp
    clang/lib/Lex/Pragma.cpp
    clang/lib/Parse/ParseDeclCXX.cpp
    clang/lib/Parse/ParseExpr.cpp
    clang/lib/Sema/SemaDeclCXX.cpp
    clang/lib/Sema/SemaExpr.cpp
    clang/lib/Sema/SemaExprCXX.cpp
    clang/lib/Sema/SemaInit.cpp
    clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
    clang/test/CXX/dcl.dcl/p4-0x.cpp
    clang/test/FixIt/fixit-static-assert.cpp
    clang/test/SemaCXX/static-assert.cpp
    clang/www/cxx_status.html

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp
index 97ad94834cd0e6..74b4209505f97a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/unary-static-assert.cpp
@@ -7,9 +7,6 @@ void f_textless(int a) {
   static_assert(sizeof(a) <= 10, "");
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when the string literal is an empty string [modernize-unary-static-assert]
   // CHECK-FIXES: {{^}}  static_assert(sizeof(a) <= 10 );{{$}}
-  static_assert(sizeof(a) <= 12, L"");
-  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use unary 'static_assert' when
-  // CHECK-FIXES: {{^}}  static_assert(sizeof(a) <= 12 );{{$}}
   FOO
   // CHECK-FIXES: {{^}}  FOO{{$}}
   static_assert(sizeof(a) <= 17, MSG);

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 79550f79cc36b5..8672b483cf836b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -135,6 +135,8 @@ C++2c Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
 - Compiler flags ``-std=c++2c`` and ``-std=gnu++2c`` have been added for experimental C++2c implementation work.
 - Implemented `P2738R1: constexpr cast from void* <https://wg21.link/P2738R1>`_.
+- Partially implemented `P2361R6: constexpr cast from void* <https://wg21.link/P2361R6>`_.
+  The changes to attributes declarations are not part of this release.
 
 Resolutions to C++ Defect Reports
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff  --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 34aa63658d5017..661a8a7175ca88 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -1804,7 +1804,7 @@ class StringLiteral final
   /// * An array of getByteLength() char used to store the string data.
 
 public:
-  enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32 };
+  enum StringKind { Ordinary, Wide, UTF8, UTF16, UTF32, Unevaluated };
 
 private:
   unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; }
@@ -1866,7 +1866,7 @@ class StringLiteral final
                                     unsigned CharByteWidth);
 
   StringRef getString() const {
-    assert(getCharByteWidth() == 1 &&
+    assert((isUnevaluated() || getCharByteWidth() == 1) &&
            "This function is used in places that assume strings use char");
     return StringRef(getStrDataAsChar(), getByteLength());
   }
@@ -1906,6 +1906,7 @@ class StringLiteral final
   bool isUTF8() const { return getKind() == UTF8; }
   bool isUTF16() const { return getKind() == UTF16; }
   bool isUTF32() const { return getKind() == UTF32; }
+  bool isUnevaluated() const { return getKind() == Unevaluated; }
   bool isPascal() const { return StringLiteralBits.IsPascal; }
 
   bool containsNonAscii() const {

diff  --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index ef5b1d8be0fd5c..b9dfb76cca4a1a 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -276,6 +276,13 @@ def ext_ms_reserved_user_defined_literal : ExtWarn<
   "identifier">, InGroup<ReservedUserDefinedLiteral>;
 def err_unsupported_string_concat : Error<
   "unsupported non-standard concatenation of string literals">;
+
+def err_unevaluated_string_prefix : Error<
+  "an unevaluated string literal cannot have an encoding prefix">;
+def err_unevaluated_string_udl : Error<
+  "an unevaluated string literal cannot be a user-defined literal">;
+def err_unevaluated_string_invalid_escape_sequence : Error<
+  "invalid escape sequence '%0' in an unevaluated string literal">;
 def err_string_concat_mixed_suffix : Error<
   "
diff ering user-defined suffixes ('%0' and '%1') in string literal "
   "concatenation">;

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index edfa30abf594a2..7ca91020181845 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -433,9 +433,6 @@ def err_ellipsis_first_param : Error<
   "ISO C requires a named parameter before '...'">;
 def err_declarator_need_ident : Error<"declarator requires an identifier">;
 def err_language_linkage_spec_unknown : Error<"unknown linkage language">;
-def err_language_linkage_spec_not_ascii : Error<
-  "string literal in language linkage specifier cannot have an "
-  "encoding-prefix">;
 def ext_use_out_of_scope_declaration : ExtWarn<
   "use of out-of-scope declaration of %0%select{| whose type is not "
   "compatible with that of an implicit declaration}1">,

diff  --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 313af292af3722..0a45f32326f4d1 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -212,6 +212,11 @@ class CharLiteralParser {
   }
 };
 
+enum class StringLiteralEvalMethod {
+  Evaluated,
+  Unevaluated,
+};
+
 /// StringLiteralParser - This decodes string escape characters and performs
 /// wide string analysis and Translation Phase #6 (concatenation of string
 /// literals) (C99 5.1.1.2p1).
@@ -230,20 +235,23 @@ class StringLiteralParser {
   SmallString<32> UDSuffixBuf;
   unsigned UDSuffixToken;
   unsigned UDSuffixOffset;
+  StringLiteralEvalMethod EvalMethod;
+
 public:
-  StringLiteralParser(ArrayRef<Token> StringToks,
-                      Preprocessor &PP);
-  StringLiteralParser(ArrayRef<Token> StringToks,
-                      const SourceManager &sm, const LangOptions &features,
-                      const TargetInfo &target,
+  StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
+                      StringLiteralEvalMethod StringMethod =
+                          StringLiteralEvalMethod::Evaluated);
+  StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
+                      const LangOptions &features, const TargetInfo &target,
                       DiagnosticsEngine *diags = nullptr)
-    : SM(sm), Features(features), Target(target), Diags(diags),
-      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
-      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
+      : SM(sm), Features(features), Target(target), Diags(diags),
+        MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+        ResultPtr(ResultBuf.data()),
+        EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
+        Pascal(false) {
     init(StringToks);
   }
 
-
   bool hadError;
   bool Pascal;
 
@@ -269,6 +277,9 @@ class StringLiteralParser {
   bool isUTF16() const { return Kind == tok::utf16_string_literal; }
   bool isUTF32() const { return Kind == tok::utf32_string_literal; }
   bool isPascal() const { return Pascal; }
+  bool isUnevaluated() const {
+    return EvalMethod == StringLiteralEvalMethod::Unevaluated;
+  }
 
   StringRef getUDSuffix() const { return UDSuffixBuf; }
 

diff  --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 210fbb183dc07e..077ebf4e5d8656 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -1788,8 +1788,12 @@ class Parser : public CodeCompletionHandler {
                                   bool IsUnevaluated);
 
   ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false);
+  ExprResult ParseUnevaluatedStringLiteralExpression();
 
 private:
+  ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
+                                          bool Unevaluated);
+
   ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc);
 
   ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc);

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index aa4fc8947cbe7d..6f413d1c9a6fa3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5703,6 +5703,8 @@ class Sema final {
   ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
                                 Scope *UDLScope = nullptr);
 
+  ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);
+
   /// ControllingExprOrType is either an opaque pointer coming out of a
   /// ParsedType or an Expr *. FIXME: it'd be better to split this interface
   /// into two so we don't take a void *, but that's awkward because one of

diff  --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 0d927d083ed205..dc5f193d37f0a5 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1136,6 +1136,8 @@ unsigned StringLiteral::mapCharByteWidth(TargetInfo const &Target,
   case UTF32:
     CharByteWidth = Target.getChar32Width();
     break;
+  case Unevaluated:
+    return sizeof(char); // Host;
   }
   assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
   CharByteWidth /= 8;
@@ -1149,35 +1151,45 @@ StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
                              const SourceLocation *Loc,
                              unsigned NumConcatenated)
     : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) {
-  assert(Ctx.getAsConstantArrayType(Ty) &&
-         "StringLiteral must be of constant array type!");
-  unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
-  unsigned ByteLength = Str.size();
-  assert((ByteLength % CharByteWidth == 0) &&
-         "The size of the data must be a multiple of CharByteWidth!");
-
-  // Avoid the expensive division. The compiler should be able to figure it
-  // out by itself. However as of clang 7, even with the appropriate
-  // llvm_unreachable added just here, it is not able to do so.
-  unsigned Length;
-  switch (CharByteWidth) {
-  case 1:
-    Length = ByteLength;
-    break;
-  case 2:
-    Length = ByteLength / 2;
-    break;
-  case 4:
-    Length = ByteLength / 4;
-    break;
-  default:
-    llvm_unreachable("Unsupported character width!");
-  }
+
+  unsigned Length = Str.size();
 
   StringLiteralBits.Kind = Kind;
-  StringLiteralBits.CharByteWidth = CharByteWidth;
-  StringLiteralBits.IsPascal = Pascal;
   StringLiteralBits.NumConcatenated = NumConcatenated;
+
+  if (Kind != StringKind::Unevaluated) {
+    assert(Ctx.getAsConstantArrayType(Ty) &&
+           "StringLiteral must be of constant array type!");
+    unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
+    unsigned ByteLength = Str.size();
+    assert((ByteLength % CharByteWidth == 0) &&
+           "The size of the data must be a multiple of CharByteWidth!");
+
+    // Avoid the expensive division. The compiler should be able to figure it
+    // out by itself. However as of clang 7, even with the appropriate
+    // llvm_unreachable added just here, it is not able to do so.
+    switch (CharByteWidth) {
+    case 1:
+      Length = ByteLength;
+      break;
+    case 2:
+      Length = ByteLength / 2;
+      break;
+    case 4:
+      Length = ByteLength / 4;
+      break;
+    default:
+      llvm_unreachable("Unsupported character width!");
+    }
+
+    StringLiteralBits.CharByteWidth = CharByteWidth;
+    StringLiteralBits.IsPascal = Pascal;
+  } else {
+    assert(!Pascal && "Can't make an unevaluated Pascal string");
+    StringLiteralBits.CharByteWidth = 1;
+    StringLiteralBits.IsPascal = false;
+  }
+
   *getTrailingObjects<unsigned>() = Length;
 
   // Initialize the trailing array of SourceLocation.
@@ -1186,7 +1198,7 @@ StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
               NumConcatenated * sizeof(SourceLocation));
 
   // Initialize the trailing array of char holding the string data.
-  std::memcpy(getTrailingObjects<char>(), Str.data(), ByteLength);
+  std::memcpy(getTrailingObjects<char>(), Str.data(), Str.size());
 
   setDependence(ExprDependence::None);
 }
@@ -1223,6 +1235,7 @@ StringLiteral *StringLiteral::CreateEmpty(const ASTContext &Ctx,
 
 void StringLiteral::outputString(raw_ostream &OS) const {
   switch (getKind()) {
+  case Unevaluated:
   case Ordinary:
     break; // no prefix.
   case Wide:  OS << 'L'; break;
@@ -1333,7 +1346,8 @@ StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
                                  const TargetInfo &Target, unsigned *StartToken,
                                  unsigned *StartTokenByteOffset) const {
   assert((getKind() == StringLiteral::Ordinary ||
-          getKind() == StringLiteral::UTF8) &&
+          getKind() == StringLiteral::UTF8 ||
+          getKind() == StringLiteral::Unevaluated) &&
          "Only narrow string literals are currently supported");
 
   // Loop over all of the tokens in this string until we find the one that

diff  --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index c066e1daa618d0..736d08d7a932d7 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -87,6 +87,24 @@ static DiagnosticBuilder Diag(DiagnosticsEngine *Diags,
     MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
 }
 
+static bool IsEscapeValidInUnevaluatedStringLiteral(char Escape) {
+  switch (Escape) {
+  case '\'':
+  case '"':
+  case '?':
+  case '\\':
+  case 'a':
+  case 'b':
+  case 'f':
+  case 'n':
+  case 'r':
+  case 't':
+  case 'v':
+    return true;
+  }
+  return false;
+}
+
 /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
 /// either a character or a string literal.
 static unsigned ProcessCharEscape(const char *ThisTokBegin,
@@ -94,7 +112,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
                                   const char *ThisTokEnd, bool &HadError,
                                   FullSourceLoc Loc, unsigned CharWidth,
                                   DiagnosticsEngine *Diags,
-                                  const LangOptions &Features) {
+                                  const LangOptions &Features,
+                                  StringLiteralEvalMethod EvalMethod) {
   const char *EscapeBegin = ThisTokBuf;
   bool Delimited = false;
   bool EndDelimiterFound = false;
@@ -105,6 +124,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
   // We know that this character can't be off the end of the buffer, because
   // that would have been \", which would not have been the end of string.
   unsigned ResultChar = *ThisTokBuf++;
+  char Escape = ResultChar;
   switch (ResultChar) {
   // These map to themselves.
   case '\\': case '\'': case '"': case '?': break;
@@ -318,6 +338,12 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
     }
   }
 
+  if (EvalMethod == StringLiteralEvalMethod::Unevaluated &&
+      !IsEscapeValidInUnevaluatedStringLiteral(Escape)) {
+    Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+         diag::err_unevaluated_string_invalid_escape_sequence)
+        << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin);
+  }
   return ResultChar;
 }
 
@@ -1727,9 +1753,10 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
     }
     unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
     uint64_t result =
-      ProcessCharEscape(TokBegin, begin, end, HadError,
-                        FullSourceLoc(Loc,PP.getSourceManager()),
-                        CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
+        ProcessCharEscape(TokBegin, begin, end, HadError,
+                          FullSourceLoc(Loc, PP.getSourceManager()), CharWidth,
+                          &PP.getDiagnostics(), PP.getLangOpts(),
+                          StringLiteralEvalMethod::Evaluated);
     *buffer_begin++ = result;
   }
 
@@ -1837,13 +1864,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
 ///         hex-digit hex-digit hex-digit hex-digit
 /// \endverbatim
 ///
-StringLiteralParser::
-StringLiteralParser(ArrayRef<Token> StringToks,
-                    Preprocessor &PP)
-  : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
-    Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
-    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
-    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
+StringLiteralParser::StringLiteralParser(ArrayRef<Token> StringToks,
+                                         Preprocessor &PP,
+                                         StringLiteralEvalMethod EvalMethod)
+    : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
+      Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
+      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+      ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false),
+      Pascal(false) {
   init(StringToks);
 }
 
@@ -1860,12 +1888,12 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
   assert(!StringToks.empty() && "expected at least one token");
   MaxTokenLength = StringToks[0].getLength();
   assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
-  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
-  Kind = StringToks[0].getKind();
-
+  SizeBound = StringToks[0].getLength() - 2; // -2 for "".
   hadError = false;
 
-  // Implement Translation Phase #6: concatenation of string literals
+  // Determines the kind of string from the prefix
+  Kind = tok::string_literal;
+
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
   for (const Token &Tok : StringToks) {
     if (Tok.getLength() < 2)
@@ -1882,7 +1910,11 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
 
     // Remember if we see any wide or utf-8/16/32 strings.
     // Also check for illegal concatenations.
-    if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) {
+    if (isUnevaluated() && Tok.getKind() != tok::string_literal) {
+      if (Diags)
+        Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix);
+      hadError = true;
+    } else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) {
       if (isOrdinary()) {
         Kind = Tok.getKind();
       } else {
@@ -1965,13 +1997,18 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
         // result of a concatenation involving at least one user-defined-string-
         // literal, all the participating user-defined-string-literals shall
         // have the same ud-suffix.
-        if (UDSuffixBuf != UDSuffix) {
+        bool UnevaluatedStringHasUDL = isUnevaluated() && !UDSuffix.empty();
+        if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) {
           if (Diags) {
             SourceLocation TokLoc = StringToks[i].getLocation();
-            Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
-              << UDSuffixBuf << UDSuffix
-              << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
-              << SourceRange(TokLoc, TokLoc);
+            if (UnevaluatedStringHasUDL) {
+              Diags->Report(TokLoc, diag::err_unevaluated_string_udl)
+                  << SourceRange(TokLoc, TokLoc);
+            } else {
+              Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+                  << UDSuffixBuf << UDSuffix
+                  << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc);
+            }
           }
           hadError = true;
         }
@@ -2043,8 +2080,9 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
       ++ThisTokBuf; // skip "
 
       // Check if this is a pascal string
-      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
-          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+      if (!isUnevaluated() && Features.PascalStrings &&
+          ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' &&
+          ThisTokBuf[1] == 'p') {
 
         // If the \p sequence is found in the first token, we have a pascal string
         // Otherwise, if we already have a pascal string, ignore the first \p
@@ -2080,9 +2118,9 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
         }
         // Otherwise, this is a non-UCN escape character.  Process it.
         unsigned ResultChar =
-          ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
-                            FullSourceLoc(StringToks[i].getLocation(), SM),
-                            CharByteWidth*8, Diags, Features);
+            ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
+                              FullSourceLoc(StringToks[i].getLocation(), SM),
+                              CharByteWidth * 8, Diags, Features, EvalMethod);
 
         if (CharByteWidth == 4) {
           // FIXME: Make the type of the result buffer correct instead of
@@ -2104,6 +2142,8 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
     }
   }
 
+  assert((!Pascal || !isUnevaluated()) &&
+         "Pascal string in unevaluated context");
   if (Pascal) {
     if (CharByteWidth == 4) {
       // FIXME: Make the type of the result buffer correct instead of
@@ -2277,8 +2317,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
       ByteNo -= Len;
     } else {
       ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
-                        FullSourceLoc(Tok.getLocation(), SM),
-                        CharByteWidth*8, Diags, Features);
+                        FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8,
+                        Diags, Features, StringLiteralEvalMethod::Evaluated);
       --ByteNo;
     }
     assert(!HadError && "This method isn't valid on erroneous strings");

diff  --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index a14fdcda58e028..71d38e59707aa1 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1869,7 +1869,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     if (!Tok.isAnnotation() && Tok.getIdentifierInfo())
       Tok.setKind(tok::identifier);
     else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) {
-      StringLiteralParser Literal(Tok, *this);
+      StringLiteralParser Literal(Tok, *this,
+                                  StringLiteralEvalMethod::Unevaluated);
       if (Literal.hadError)
         return;
 

diff  --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index 929e06d5357137..85543ed3f98751 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -1088,7 +1088,8 @@ struct PragmaDebugHandler : public PragmaHandler {
       if (DiagName.is(tok::eod))
         PP.getDiagnostics().dump();
       else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) {
-        StringLiteralParser Literal(DiagName, PP);
+        StringLiteralParser Literal(DiagName, PP,
+                                    StringLiteralEvalMethod::Unevaluated);
         if (Literal.hadError)
           return;
         PP.getDiagnostics().dump(Literal.GetString());

diff  --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index eacb4fa2343497..126843d8fd2427 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -350,7 +350,7 @@ Decl *Parser::ParseNamespaceAlias(SourceLocation NamespaceLoc,
 ///
 Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) {
   assert(isTokenStringLiteral() && "Not a string literal!");
-  ExprResult Lang = ParseStringLiteralExpression(false);
+  ExprResult Lang = ParseUnevaluatedStringLiteralExpression();
 
   ParseScope LinkageScope(this, Scope::DeclScope);
   Decl *LinkageSpec =
@@ -1023,7 +1023,7 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) {
       return nullptr;
     }
 
-    AssertMessage = ParseStringLiteralExpression();
+    AssertMessage = ParseUnevaluatedStringLiteralExpression();
     if (AssertMessage.isInvalid()) {
       SkipMalformedDecl();
       return nullptr;

diff  --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 179657fc1ed1a7..75d04824d8b99e 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -3256,6 +3256,17 @@ Parser::ParseCompoundLiteralExpression(ParsedType Ty,
 ///         string-literal
 /// \verbatim
 ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) {
+  return ParseStringLiteralExpression(AllowUserDefinedLiteral,
+                                      /*Unevaluated=*/false);
+}
+
+ExprResult Parser::ParseUnevaluatedStringLiteralExpression() {
+  return ParseStringLiteralExpression(/*AllowUserDefinedLiteral=*/false,
+                                      /*Unevaluated=*/true);
+}
+
+ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
+                                                bool Unevaluated) {
   assert(isTokenStringLiteral() && "Not a string literal!");
 
   // String concat.  Note that keywords like __func__ and __FUNCTION__ are not
@@ -3267,6 +3278,11 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) {
     ConsumeStringToken();
   } while (isTokenStringLiteral());
 
+  if (Unevaluated) {
+    assert(!AllowUserDefinedLiteral && "UDL are always evaluated");
+    return Actions.ActOnUnevaluatedStringLiteral(StringToks);
+  }
+
   // Pass the set of string tokens, ready for concatenation, to the actions.
   return Actions.ActOnStringLiteral(StringToks,
                                     AllowUserDefinedLiteral ? getCurScope()

diff  --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 1222e4330a6e90..507cfc413392a9 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -16471,11 +16471,7 @@ Decl *Sema::ActOnStartLinkageSpecification(Scope *S, SourceLocation ExternLoc,
                                            Expr *LangStr,
                                            SourceLocation LBraceLoc) {
   StringLiteral *Lit = cast<StringLiteral>(LangStr);
-  if (!Lit->isOrdinary()) {
-    Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii)
-      << LangStr->getSourceRange();
-    return nullptr;
-  }
+  assert(Lit->isUnevaluated() && "Unexpected string literal kind");
 
   StringRef Lang = Lit->getString();
   LinkageSpecDecl::LanguageIDs Language;
@@ -16940,10 +16936,7 @@ Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc,
       llvm::raw_svector_ostream Msg(MsgBuffer);
       if (AssertMessage) {
         const auto *MsgStr = cast<StringLiteral>(AssertMessage);
-        if (MsgStr->isOrdinary())
-          Msg << MsgStr->getString();
-        else
-          MsgStr->printPretty(Msg, nullptr, getPrintingPolicy());
+        Msg << MsgStr->getString();
       }
 
       Expr *InnerCond = nullptr;

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4d6414eed99cd2..fa1f5e36a6ed40 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1921,6 +1921,30 @@ static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope,
   return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc);
 }
 
+ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) {
+  StringLiteralParser Literal(StringToks, PP,
+                              StringLiteralEvalMethod::Unevaluated);
+  if (Literal.hadError)
+    return ExprError();
+
+  SmallVector<SourceLocation, 4> StringTokLocs;
+  for (const Token &Tok : StringToks)
+    StringTokLocs.push_back(Tok.getLocation());
+
+  StringLiteral *Lit = StringLiteral::Create(
+      Context, Literal.GetString(), StringLiteral::Unevaluated, false, {},
+      &StringTokLocs[0], StringTokLocs.size());
+
+  if (!Literal.getUDSuffix().empty()) {
+    SourceLocation UDSuffixLoc =
+        getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()],
+                       Literal.getUDSuffixOffset());
+    return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl));
+  }
+
+  return Lit;
+}
+
 /// ActOnStringLiteral - The specified tokens were lexed as pasted string
 /// fragments (e.g. "foo" "bar" L"baz").  The result string has to handle string
 /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from

diff  --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 4d05a0e0c50749..168a0f354683d8 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -4088,6 +4088,9 @@ Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) {
             case StringLiteral::Wide:
               return Context.typesAreCompatible(Context.getWideCharType(),
                                                 QualType(ToPointeeType, 0));
+            case StringLiteral::Unevaluated:
+              assert(false && "Unevaluated string literal in expression");
+              break;
           }
         }
       }

diff  --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index f2bbdae3d5f36b..71d59a64fb5767 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -143,6 +143,9 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
     if (IsWideCharCompatible(ElemTy, Context))
       return SIF_IncompatWideStringIntoWideChar;
     return SIF_Other;
+  case StringLiteral::Unevaluated:
+    assert(false && "Unevaluated string literal in initialization");
+    break;
   }
 
   llvm_unreachable("missed a StringLiteral kind?");

diff  --git a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
index d1c3bcb8fa037c..206c46f34f05b2 100644
--- a/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
@@ -8,7 +8,7 @@ extern "C" {
 extern "C" plusplus {
 }
 
-extern u8"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
-extern L"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
-extern u"C++" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
-extern U"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
+extern u8"C" {}  // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+extern L"C" {}   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+extern u"C++" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+extern U"C" {}   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}

diff  --git a/clang/test/CXX/dcl.dcl/p4-0x.cpp b/clang/test/CXX/dcl.dcl/p4-0x.cpp
index 925dea9d471f4e..545011822eb7d0 100644
--- a/clang/test/CXX/dcl.dcl/p4-0x.cpp
+++ b/clang/test/CXX/dcl.dcl/p4-0x.cpp
@@ -18,4 +18,7 @@ static_assert(S(false), "not so fast"); // expected-error {{not so fast}}
 static_assert(T(), "");
 static_assert(U(), ""); // expected-error {{ambiguous}}
 
-static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{static assertion failed: L"\024hi!\""}}
+static_assert(false, L"\x14hi" // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \
+                               // expected-error {{invalid escape sequence '\x14' in an unevaluated string literal}}
+                     "!"
+                     R"x(")x");

diff  --git a/clang/test/FixIt/fixit-static-assert.cpp b/clang/test/FixIt/fixit-static-assert.cpp
index ac07df4d1a5813..432b140ee1afa2 100644
--- a/clang/test/FixIt/fixit-static-assert.cpp
+++ b/clang/test/FixIt/fixit-static-assert.cpp
@@ -11,8 +11,6 @@ static_assert(true && "String");
 // String literal prefixes are good.
 static_assert(true && R"(RawString)");
 // CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:","
-static_assert(true && L"RawString");
-// CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:","
 
 static_assert(true);
 // CHECK-DAG: {[[@LINE-1]]:19-[[@LINE-1]]:19}:", \"\""

diff  --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp
index 83fc4bd256285d..da8b726a3e4b07 100644
--- a/clang/test/SemaCXX/static-assert.cpp
+++ b/clang/test/SemaCXX/static-assert.cpp
@@ -29,13 +29,23 @@ template<typename T> struct S {
 S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}}
 S<int> s2;
 
-static_assert(false, L"\xFFFFFFFF"); // expected-error {{static assertion failed: L"\xFFFFFFFF"}}
-static_assert(false, u"\U000317FF"); // expected-error {{static assertion failed: u"\U000317FF"}}
-
-static_assert(false, u8"Ω"); // expected-error {{static assertion failed: u8"\316\251"}}
-static_assert(false, L"\u1234"); // expected-error {{static assertion failed: L"\x1234"}}
-static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static assertion failed: L"\x1FF""0\x123""fx\xFFFFFgoop"}}
-
+static_assert(false, L"\xFFFFFFFF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \
+                                     // expected-error {{invalid escape sequence '\xFFFFFFFF' in an unevaluated string literal}}
+static_assert(false, u"\U000317FF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+// FIXME: render this as u8"\u03A9"
+static_assert(false, u8"Ω");     // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+static_assert(false, L"\u1234"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+static_assert(false, L"\x1ff"    // expected-error {{an unevaluated string literal cannot have an encoding prefix}} \
+                                 // expected-error {{invalid escape sequence '\x1ff' in an unevaluated string literal}}
+                     "0\x123"    // expected-error {{invalid escape sequence '\x123' in an unevaluated string literal}}
+                     "fx\xfffff" // expected-error {{invalid escape sequence '\xfffff' in an unevaluated string literal}}
+                     "goop");
+
+static_assert(false, "\'\"\?\\\a\b\f\n\r\t\v"); // expected-error {{'"?\<U+0007><U+0008>}}
+static_assert(true, "\xFF"); // expected-error {{invalid escape sequence '\xFF' in an unevaluated string literal}}
+static_assert(true, "\123"); // expected-error {{invalid escape sequence '\123' in an unevaluated string literal}}
+static_assert(true, "\pOh no, a Pascal string!"); // expected-warning {{unknown escape sequence '\p'}} \
+                                                  // expected-error {{invalid escape sequence '\p' in an unevaluated string literal}}
 static_assert(false, R"(a
 \tb
 c

diff  --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index 2c98f86c09a984..196a318c2b0b0d 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -115,7 +115,12 @@ <h2 id="cxx26">C++2c implementation status</h2>
  <tr>
   <td>Unevaluated strings</td>
   <td><a href="https://wg21.link/P2361R6">P2361R6</a></td>
-  <td class="none" align="center">No</td>
+  <td class="partial" align="center">
+    <details>
+      <summary>Clang 17 (Partial)</summary>
+      Attributes arguments don't yet parse as unevaluated string literals.
+    </details>
+  </td>
  </tr>
  <tr>
   <td>Add @, $, and ` to the basic character set</td>


        


More information about the cfe-commits mailing list