r176779 - Handle _Pragma on a u8, u, or U string literal per the C11 specification. Also

Richard Smith richard-llvm at metafoo.co.uk
Sat Mar 9 15:30:15 PST 2013


Author: rsmith
Date: Sat Mar  9 17:30:15 2013
New Revision: 176779

URL: http://llvm.org/viewvc/llvm-project?rev=176779&view=rev
Log:
Handle _Pragma on a u8, u, or U string literal per the C11 specification. Also
handle raw string literals here. C++11 doesn't yet specify how they will
behave, but discussion on core suggests that we should just strip off
everything but the r-char-sequence.

Modified:
    cfe/trunk/include/clang/Basic/TokenKinds.h
    cfe/trunk/include/clang/Parse/Parser.h
    cfe/trunk/lib/Lex/MacroArgs.cpp
    cfe/trunk/lib/Lex/Pragma.cpp
    cfe/trunk/test/Lexer/pragma-operators.cpp

Modified: cfe/trunk/include/clang/Basic/TokenKinds.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TokenKinds.h?rev=176779&r1=176778&r2=176779&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/TokenKinds.h (original)
+++ cfe/trunk/include/clang/Basic/TokenKinds.h Sat Mar  9 17:30:15 2013
@@ -68,15 +68,21 @@ inline bool isAnyIdentifier(TokenKind K)
   return (K == tok::identifier) || (K == tok::raw_identifier);
 }
 
+/// \brief Return true if this is a C or C++ string-literal (or
+/// C++11 user-defined-string-literal) token.
+inline bool isStringLiteral(TokenKind K) {
+  return K == tok::string_literal || K == tok::wide_string_literal ||
+         K == tok::utf8_string_literal || K == tok::utf16_string_literal ||
+         K == tok::utf32_string_literal;
+}
+
 /// \brief Return true if this is a "literal" kind, like a numeric
 /// constant, string, etc.
 inline bool isLiteral(TokenKind K) {
-  return (K == tok::numeric_constant) || (K == tok::char_constant) ||
-         (K == tok::wide_char_constant) || (K == tok::utf16_char_constant) ||
-         (K == tok::utf32_char_constant) || (K == tok::string_literal) ||
-         (K == tok::wide_string_literal) || (K == tok::utf8_string_literal) ||
-         (K == tok::utf16_string_literal) || (K == tok::utf32_string_literal) ||
-         (K == tok::angle_string_literal);
+  return K == tok::numeric_constant || K == tok::char_constant ||
+         K == tok::wide_char_constant || K == tok::utf16_char_constant ||
+         K == tok::utf32_char_constant || isStringLiteral(K) ||
+         K == tok::angle_string_literal;
 }
 
 /// \brief Return true if this is any of tok::annot_* kinds.

Modified: cfe/trunk/include/clang/Parse/Parser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Parse/Parser.h?rev=176779&r1=176778&r2=176779&view=diff
==============================================================================
--- cfe/trunk/include/clang/Parse/Parser.h (original)
+++ cfe/trunk/include/clang/Parse/Parser.h Sat Mar  9 17:30:15 2013
@@ -279,11 +279,7 @@ private:
   /// isTokenStringLiteral - True if this token is a string-literal.
   ///
   bool isTokenStringLiteral() const {
-    return Tok.getKind() == tok::string_literal ||
-           Tok.getKind() == tok::wide_string_literal ||
-           Tok.getKind() == tok::utf8_string_literal ||
-           Tok.getKind() == tok::utf16_string_literal ||
-           Tok.getKind() == tok::utf32_string_literal;
+    return tok::isStringLiteral(Tok.getKind());
   }
 
   /// \brief Returns true if the current token is '=' or is a type of '='.

Modified: cfe/trunk/lib/Lex/MacroArgs.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/MacroArgs.cpp?rev=176779&r1=176778&r2=176779&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/MacroArgs.cpp (original)
+++ cfe/trunk/lib/Lex/MacroArgs.cpp Sat Mar  9 17:30:15 2013
@@ -215,15 +215,11 @@ Token MacroArgs::StringifyArgument(const
 
     // If this is a string or character constant, escape the token as specified
     // by 6.10.3.2p2.
-    if (Tok.is(tok::string_literal) ||       // "foo"
-        Tok.is(tok::wide_string_literal) ||  // L"foo"
-        Tok.is(tok::utf8_string_literal) ||  // u8"foo"
-        Tok.is(tok::utf16_string_literal) || // u"foo"
-        Tok.is(tok::utf32_string_literal) || // U"foo"
-        Tok.is(tok::char_constant) ||        // 'x'
-        Tok.is(tok::wide_char_constant) ||   // L'x'.
-        Tok.is(tok::utf16_char_constant) ||  // u'x'.
-        Tok.is(tok::utf32_char_constant)) {  // U'x'.
+    if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
+        Tok.is(tok::char_constant) ||          // 'x'
+        Tok.is(tok::wide_char_constant) ||     // L'x'.
+        Tok.is(tok::utf16_char_constant) ||    // u'x'.
+        Tok.is(tok::utf32_char_constant)) {    // U'x'.
       bool Invalid = false;
       std::string TokStr = PP.getSpelling(Tok, &Invalid);
       if (!Invalid) {

Modified: cfe/trunk/lib/Lex/Pragma.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Pragma.cpp?rev=176779&r1=176778&r2=176779&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Pragma.cpp (original)
+++ cfe/trunk/lib/Lex/Pragma.cpp Sat Mar  9 17:30:15 2013
@@ -184,7 +184,7 @@ void Preprocessor::Handle_Pragma(Token &
 
   // Read the '"..."'.
   Lex(Tok);
-  if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) {
+  if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
     // Skip this token, and the ')', if present.
     if (Tok.isNot(tok::r_paren))
@@ -219,15 +219,50 @@ void Preprocessor::Handle_Pragma(Token &
   SourceLocation RParenLoc = Tok.getLocation();
   std::string StrVal = getSpelling(StrTok);
 
-  // The _Pragma is lexically sound.  Destringize according to C99 6.10.9.1:
-  // "The string literal is destringized by deleting the L prefix, if present,
+  // The _Pragma is lexically sound.  Destringize according to C11 6.10.9.1:
+  // "The string literal is destringized by deleting any encoding prefix,
   // deleting the leading and trailing double-quotes, replacing each escape
   // sequence \" by a double-quote, and replacing each escape sequence \\ by a
   // single backslash."
-  if (StrVal[0] == 'L')  // Remove L prefix.
+  if (StrVal[0] == 'L' || StrVal[0] == 'U' ||
+      (StrVal[0] == 'u' && StrVal[1] != '8'))
     StrVal.erase(StrVal.begin());
-  assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
-         "Invalid string token!");
+  else if (StrVal[0] == 'u')
+    StrVal.erase(StrVal.begin(), StrVal.begin() + 2);
+
+  if (StrVal[0] == 'R') {
+    // FIXME: C++11 does not specify how to handle raw-string-literals here.
+    // We strip off the 'R', the quotes, the d-char-sequences, and the parens.
+    assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' &&
+           "Invalid raw string token!");
+
+    // Measure the length of the d-char-sequence.
+    unsigned NumDChars = 0;
+    while (StrVal[2 + NumDChars] != '(') {
+      assert(NumDChars < (StrVal.size() - 5) / 2 &&
+             "Invalid raw string token!");
+      ++NumDChars;
+    }
+    assert(StrVal[StrVal.size() - 2 - NumDChars] == ')');
+
+    // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the
+    // parens below.
+    StrVal.erase(0, 2 + NumDChars);
+    StrVal.erase(StrVal.size() - 1 - NumDChars);
+  } else {
+    assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+           "Invalid string token!");
+
+    // Remove escaped quotes and escapes.
+    for (unsigned i = 1, e = StrVal.size(); i < e-2; ++i) {
+      if (StrVal[i] == '\\' &&
+          (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
+        // \\ -> '\' and \" -> '"'.
+        StrVal.erase(StrVal.begin()+i);
+        --e;
+      }
+    }
+  }
 
   // Remove the front quote, replacing it with a space, so that the pragma
   // contents appear to have a space before them.
@@ -236,16 +271,6 @@ void Preprocessor::Handle_Pragma(Token &
   // Replace the terminating quote with a \n.
   StrVal[StrVal.size()-1] = '\n';
 
-  // Remove escaped quotes and escapes.
-  for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
-    if (StrVal[i] == '\\' &&
-        (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
-      // \\ -> '\' and \" -> '"'.
-      StrVal.erase(StrVal.begin()+i);
-      --e;
-    }
-  }
-  
   // Plop the string (including the newline and trailing null) into a buffer
   // where we can lex it.
   Token TmpTok;

Modified: cfe/trunk/test/Lexer/pragma-operators.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/pragma-operators.cpp?rev=176779&r1=176778&r2=176779&view=diff
==============================================================================
--- cfe/trunk/test/Lexer/pragma-operators.cpp (original)
+++ cfe/trunk/test/Lexer/pragma-operators.cpp Sat Mar  9 17:30:15 2013
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fms-extensions -E %s | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -std=c++11 -E %s | FileCheck %s
 
 // Test that we properly expand the C99 _Pragma and Microsoft __pragma
 // into #pragma directives, with newlines where needed. <rdar://problem/8412013>
@@ -17,3 +17,21 @@
 #pragma warning(push)
 B(foo)
 #pragma warning(pop)
+
+#define pragma_L _Pragma(L"GCC diagnostic push")
+#define pragma_u8 _Pragma(u8"system_header")
+#define pragma_u _Pragma(u"GCC diagnostic pop")
+#define pragma_U _Pragma(U"comment(lib, \"libfoo\")")
+#define pragma_R _Pragma(R"(clang diagnostic ignored "-Wunused")")
+#define pragma_UR _Pragma(UR"(clang diagnostic error "-Wunused")")
+#define pragma_hello _Pragma(u8R"x(message R"y("Hello", world!)y")x")
+// CHECK: int n =
+// CHECK: #pragma GCC diagnostic push
+// CHECK: #pragma system_header
+// CHECK: #pragma GCC diagnostic pop
+// CHECK: #pragma comment(lib, "libfoo")
+// CHECK: #pragma clang diagnostic ignored "-Wunused"
+// CHECK: #pragma clang diagnostic error "-Wunused"
+// CHECK: #pragma message("\042Hello\042, world!")
+// CHECK: 0;
+int n = pragma_L pragma_u8 pragma_u pragma_U pragma_R pragma_UR pragma_hello 0;





More information about the cfe-commits mailing list