[Patch] Add the __wchar_t type in MS-compatibility mode (PR15815)

Aaron Ballman aaron at aaronballman.com
Wed May 8 13:59:08 PDT 2013


diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h
index 902ded5..302a3a2 100644
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -740,10 +740,11 @@ public:
   CanQualType VoidTy;
   CanQualType BoolTy;
   CanQualType CharTy;
-  CanQualType WCharTy;  // [C++ 3.9.1p5], integer type in C99.
-  CanQualType WIntTy;   // [C99 7.24.1], integer type unchanged by
default promotions.
-  CanQualType Char16Ty; // [C++0x 3.9.1p5], integer type in C99.
-  CanQualType Char32Ty; // [C++0x 3.9.1p5], integer type in C99.
+  CanQualType WCharTy;   // [C++ 3.9.1p5], integer type in C99.
+  CanQualType MSWCharTy; // __wchar_t, Microsoft extension.
+  CanQualType WIntTy;    // [C99 7.24.1], integer type unchanged by
default promotions.
+  CanQualType Char16Ty;  // [C++0x 3.9.1p5], integer type in C99.
+  CanQualType Char32Ty;  // [C++0x 3.9.1p5], integer type in C99.

It seems this changes more than just the additional line you added
(perhaps a line endings thing?).


   CanQualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy, Int128Ty;
   CanQualType UnsignedCharTy, UnsignedShortTy, UnsignedIntTy, UnsignedLongTy;
   CanQualType UnsignedLongLongTy, UnsignedInt128Ty;
@@ -1135,6 +1136,11 @@ public:
   /// by the target.
   QualType getWCharType() const { return WCharTy; }

+  /// \brief Returns the __wchar_t, as a Microsoft extension. In C++, this is
+  /// the same as wchar_t. In C99, this is the same as wchar_t would be if it
+  /// had been available as a built-in type.
+  QualType getMSWCharType() const { return MSWCharTy; }
+
   /// \brief Return the type of "signed wchar_t".
   ///
   /// Used when in C++, as a GCC extension.
diff --git a/include/clang/AST/PrettyPrinter.h
b/include/clang/AST/PrettyPrinter.h
index e3c09e7..adfd1ac 100644
--- a/include/clang/AST/PrettyPrinter.h
+++ b/include/clang/AST/PrettyPrinter.h
@@ -40,7 +40,8 @@ struct PrintingPolicy {
       SuppressUnwrittenScope(false), SuppressInitializers(false),
       ConstantArraySizeAsWritten(false), AnonymousTagLocations(true),
       SuppressStrongLifetime(false), Bool(LO.Bool),
-      TerseOutput(false), PolishForDeclaration(false) { }
+      TerseOutput(false), PolishForDeclaration(false),
+      MSWChar(LO.MicrosoftExt && !LO.WChar) { }

   /// \brief What language we're printing.
   LangOptions LangOpts;
@@ -146,6 +147,10 @@ struct PrintingPolicy {
   /// declaration tag; such as, do not print attributes attached to
the declaration.
   ///
   unsigned PolishForDeclaration : 1;
+
+  /// \brief When true, print the built-in wchar_t type as __wchar_t.
For use in
+  /// Microsoft mode when wchar_t is not available.
+  unsigned MSWChar : 1;
 };

 } // end namespace clang
diff --git a/include/clang/Basic/Specifiers.h b/include/clang/Basic/Specifiers.h
index eb3fc65..781a19e 100644
--- a/include/clang/Basic/Specifiers.h
+++ b/include/clang/Basic/Specifiers.h
@@ -38,6 +38,7 @@ namespace clang {
     TST_void,
     TST_char,
     TST_wchar,        // C++ wchar_t
+    TST___wchar,      // Microsoft extension __wchar_t
     TST_char16,       // C++11 char16_t
     TST_char32,       // C++11 char32_t
     TST_int,
diff --git a/include/clang/Basic/TokenKinds.def
b/include/clang/Basic/TokenKinds.def
index bcf0f31..024e8fd 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -299,6 +299,7 @@ KEYWORD(typeid                      , KEYCXX)
 KEYWORD(using                       , KEYCXX)
 KEYWORD(virtual                     , KEYCXX)
 KEYWORD(wchar_t                     , WCHARSUPPORT)
+KEYWORD(__wchar_t                   , KEYMS)

 // C++ 2.5p2: Alternative Representations.
 CXX_KEYWORD_OPERATOR(and     , ampamp)
diff --git a/include/clang/Sema/DeclSpec.h b/include/clang/Sema/DeclSpec.h
index 059919a..0b0c622 100644
--- a/include/clang/Sema/DeclSpec.h
+++ b/include/clang/Sema/DeclSpec.h
@@ -260,6 +260,7 @@ public:
   static const TST TST_void = clang::TST_void;
   static const TST TST_char = clang::TST_char;
   static const TST TST_wchar = clang::TST_wchar;
+  static const TST TST___wchar = clang::TST___wchar;
   static const TST TST_char16 = clang::TST_char16;
   static const TST TST_char32 = clang::TST_char32;
   static const TST TST_int = clang::TST_int;
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index e6dfd44..335a881 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -902,8 +902,14 @@ void ASTContext::InitBuiltinTypes(const
TargetInfo &Target) {
       InitBuiltinType(WCharTy,           BuiltinType::WChar_S);
     else  // -fshort-wchar makes wchar_t be unsigned.
       InitBuiltinType(WCharTy,           BuiltinType::WChar_U);
-  } else // C99 (or C++ using -fno-wchar)
+    MSWCharTy = WCharTy;
+  } else { // C99 (or C++ using -fno-wchar)
     WCharTy = getFromTargetType(Target.getWCharType());
+    if (TargetInfo::isTypeSigned(Target.getWCharType()))
+      InitBuiltinType(MSWCharTy,         BuiltinType::WChar_S);
+    else
+      InitBuiltinType(MSWCharTy,         BuiltinType::WChar_U);
+  }

   WIntTy = getFromTargetType(Target.getWIntType());

diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index fa16fac..a1f0b08 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -1521,7 +1521,7 @@ StringRef BuiltinType::getName(const
PrintingPolicy &Policy) const {
   case Double:            return "double";
   case LongDouble:        return "long double";
   case WChar_S:
-  case WChar_U:           return "wchar_t";
+  case WChar_U:           return Policy.MSWChar ? "__wchar_t" : "wchar_t";
   case Char16:            return "char16_t";
   case Char32:            return "char32_t";
   case NullPtr:           return "nullptr_t";
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 17abb01..418f17c 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -748,6 +748,7 @@ private:
     case tok::kw_float:
     case tok::kw_double:
     case tok::kw_wchar_t:
+    case tok::kw___wchar_t:
     case tok::kw_bool:
     case tok::kw___underlying_type:
       return true;
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 6a87b78..ff97388 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -227,6 +227,7 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName,
   switch (Tok.getKind()) {
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_bool:
@@ -2898,6 +2899,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec,
                                      DiagID);
       break;
+    case tok::kw___wchar_t:
+      isInvalid = DS.SetTypeSpecType(DeclSpec::TST___wchar, Loc, PrevSpec,
+                                     DiagID);
+      break;
     case tok::kw_char16_t:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec,
                                      DiagID);
@@ -3827,6 +3832,7 @@ bool Parser::isKnownToBeTypeSpecifier(const
Token &Tok) const {
   case tok::kw_void:
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
@@ -3909,6 +3915,7 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw_void:
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
@@ -4064,6 +4071,7 @@ bool Parser::isDeclarationSpecifier(bool
DisambiguatingWithExpression) {
   case tok::kw_void:
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:

diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 9521ffb..b7eb99b 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -1007,6 +1007,7 @@ ExprResult Parser::ParseCastExpression(bool
isUnaryExpression,
   case tok::annot_decltype:
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_bool:
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index f259d5f..6bb92f4 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -1568,6 +1568,9 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
   case tok::kw_wchar_t:
     DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID);
     break;
+  case tok::kw___wchar_t:
+    DS.SetTypeSpecType(DeclSpec::TST___wchar, Loc, PrevSpec, DiagID);
+    break;
   case tok::kw_char16_t:
     DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec, DiagID);
     break;
diff --git a/lib/Parse/ParseObjc.cpp b/lib/Parse/ParseObjc.cpp
index 4a572f1..a94e1a2 100644
--- a/lib/Parse/ParseObjc.cpp
+++ b/lib/Parse/ParseObjc.cpp
@@ -780,6 +780,7 @@ IdentifierInfo
*Parser::ParseObjCSelectorPiece(SourceLocation &SelectorLoc) {
   case tok::kw_void:
   case tok::kw_volatile:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_while:
   case tok::kw__Bool:
   case tok::kw__Complex:
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index dff3b64..f8aa395 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -834,6 +834,7 @@
Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
   case tok::kw_class:
   case tok::kw_typename:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw___underlying_type:
@@ -1253,6 +1254,7 @@
Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,

   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_bool:
diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp
index 3b3ab2c..5066f4a 100644
--- a/lib/Sema/DeclSpec.cpp
+++ b/lib/Sema/DeclSpec.cpp
@@ -284,6 +284,7 @@ bool Declarator::isDeclarationOfFunction() const {
     case TST_unspecified:
     case TST_void:
     case TST_wchar:
+    case TST___wchar:
     case TST_image1d_t:
     case TST_image1d_array_t:
     case TST_image1d_buffer_t:
@@ -418,6 +419,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T) {
   case DeclSpec::TST_void:        return "void";
   case DeclSpec::TST_char:        return "char";
   case DeclSpec::TST_wchar:       return "wchar_t";
+  case DeclSpec::TST___wchar:     return "__wchar_t";
   case DeclSpec::TST_char16:      return "char16_t";
   case DeclSpec::TST_char32:      return "char32_t";
   case DeclSpec::TST_int:         return "int";
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index e0e8bd6..c08aa66 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -100,6 +100,7 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind
Kind) const {
   case tok::kw_float:
   case tok::kw_double:
   case tok::kw_wchar_t:
+  case tok::kw___wchar_t:
   case tok::kw_bool:
   case tok::kw___underlying_type:
     return true;
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index 993e68b..8dfdeff 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -4304,7 +4304,8 @@ InitializationSequence::InitializationSequence(Sema &S,
       TryListInitialization(S, Entity, Kind, cast<InitListExpr>(Initializer),
                             *this);
       AddParenthesizedArrayInitStep(DestType);
-    } else if (DestAT->getElementType()->isAnyCharacterType())
+    } else if (DestAT->getElementType()->isAnyCharacterType() &&
+               !isa<StringLiteral>(Initializer->IgnoreParens()))
       SetFailed(FK_ArrayNeedsInitListOrStringLiteral);
     else
       SetFailed(FK_ArrayNeedsInitList);
diff --git a/lib/Sema/SemaTemplateVariadic.cpp
b/lib/Sema/SemaTemplateVariadic.cpp
index db885ae..6d1180f 100644
--- a/lib/Sema/SemaTemplateVariadic.cpp
+++ b/lib/Sema/SemaTemplateVariadic.cpp
@@ -710,6 +710,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
   case TST_void:
   case TST_char:
   case TST_wchar:
+  case TST___wchar:
   case TST_char16:
   case TST_char32:
   case TST_int:
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 0959f7d..1791dd2 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -743,6 +743,11 @@ static QualType
ConvertDeclSpecToType(TypeProcessingState &state) {
       Result = Context.getUnsignedWCharType();
     }
     break;
+  case DeclSpec::TST___wchar:
+    assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified &&
+      "Unknown TSS value");
+    Result = Context.getMSWCharType();
+    break;
   case DeclSpec::TST_char16:
       assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified &&
         "Unknown TSS value");
diff --git a/test/Sema/ms-wchar.c b/test/Sema/ms-wchar.c
new file mode 100644
index 0000000..b09bacb
--- /dev/null
+++ b/test/Sema/ms-wchar.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -triple
i386-pc-win32 %s
+
+// C++ mode with -fno-wchar works the same as C mode for wchar_t.
+// RUN: %clang_cc1 -x c++ -fno-wchar -fsyntax-only -verify
-fms-extensions -triple i386-pc-win32 %s
+
+wchar_t f(); // expected-error{{unknown type name 'wchar_t'}}
+
+// __wchar_t is available as an MS extension.
+__wchar_t g = L'a'; // expected-note {{previous}}
+
+// __wchar_t is a distinct type, separate from the target's integer
type for wide chars.
+unsigned short g; // expected-error {{redefinition of 'g' with a
different type: 'unsigned short' vs '__wchar_t'}}
+
+signed __wchar_t x; // expected-error {{'__wchar_t' cannot be signed
or unsigned}}
+unsigned __wchar_t y; // expected-error {{'__wchar_t' cannot be
signed or unsigned}}
+
+// The type of a wide string literal is actually not __wchar_t.
+__wchar_t s[] = L"Hello world!"; // expected-error {{array
initializer must be an initializer list}}
+
+__wchar_t c = L'h'; // No error.
diff --git a/test/SemaCXX/ms-wchar.cpp b/test/SemaCXX/ms-wchar.cpp
new file mode 100644
index 0000000..b0a13a0
--- /dev/null
+++ b/test/SemaCXX/ms-wchar.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -triple
i386-pc-win32 %s
+
+wchar_t f();
+__wchar_t f(); // No error, wchar_t and __wchar_t are the same type.
+
+__wchar_t g = L'a';
+__wchar_t s[] = L"Hello world!";
+
+signed __wchar_t x; // expected-error {{'__wchar_t' cannot be signed
or unsigned}}
+unsigned __wchar_t y; // expected-error {{'__wchar_t' cannot be
signed or unsigned}}

The rest LGTM, but I would wait for further confirmation before committing.

Thanks!

~Aaron

On Wed, May 8, 2013 at 12:20 PM, Hans Wennborg <hans at chromium.org> wrote:
> Ping?
>
> On Mon, May 6, 2013 at 4:02 PM, Hans Wennborg <hans at chromium.org> wrote:
>> Hello again,
>>
>> As it turned out, that commit broke test/Sema/wchar.c on Windows, and
>> was reverted.
>>
>> The line that failed was this:
>>
>>   unsigned short s[] = L"something";
>>
>> The problem is that my patch was causing the type of the string
>> literal to change to __wchar_t on Windows, and since that is a
>> different type than unsigned short, the initialization doesn't work.
>>
>> I experimented some more with MSVC and learned that the code above
>> does compile in C mode, but not in C++ mode. The following does not
>> compile in C mode:
>>
>>   __wchar_t s[] = L"something";
>>
>> Which I found surprising.
>>
>> I think the semantics are like this:
>>
>> In C++, we have the wchar_t built-in type, and that's the type used
>> for wide string literals. __wchar_t is the same as wchar_t.
>>
>> In C, __wchar_t is the same as the built-in wchar_t would have been if
>> it were available. The type of wide string literals is array of
>> unsigned short.
>>
>> I'm attaching a new patch that implements this behavior. Please take a
>> look, and sorry again for the breakage.
>>
>> Thanks,
>> Hans
>>
>> On Fri, May 3, 2013 at 10:16 AM, Hans Wennborg <hans at chromium.org> wrote:
>>> Thanks! Committed r181004.
>>>
>>> On Thu, May 2, 2013 at 6:55 PM, Aaron Ballman <aaron at aaronballman.com> wrote:
>>>> Patch LGTM!
>>>>
>>>> ~Aaron
>>>>
>>>> On Thu, May 2, 2013 at 1:38 PM, Hans Wennborg <hans at chromium.org> wrote:
>>>>> On Tue, Apr 23, 2013 at 4:46 PM, Hans Wennborg <hans at chromium.org> wrote:
>>>>>> On Tue, Apr 23, 2013 at 2:48 PM, Richard Smith <richard at metafoo.co.uk> wrote:
>>>>>>> Does the setup code in ASTContext::InitBuiltinTypes do the right thing here?
>>>>>>
>>>>>> Hmm, turns out it didn't.
>>>>>>
>>>>>> I guess it's not obvious what the right thing is here. From
>>>>>> experimenting a bit, it seems that __wchar_t is always available, and
>>>>>> is always a distinct builtin type in visual studio, even in C.
>>>>>>
>>>>>> New patch attached.
>>>>>
>>>>> Richard pointed out on IRC that we shouldn't change semantics in
>>>>> -fms-extensions.
>>>>>
>>>>> I'm attaching a new patch. In -fms-extensions, __wchar_t is the same
>>>>> as built-in wchar_t if available, otherwise it is the same as the
>>>>> appropriate integer type.
>>>>>
>>>>> In -fms-compatibility we try to mimic MSVC exactly: there is always a
>>>>> __wchar_t type, and it is always separate from the regular integer
>>>>> types.
>>>>>
>>>>> There are a number of parameters here: C vs. C++, -fms-extensions vs.
>>>>> -fms-compatibility, and -fno-wchar. The patch covers all of them and I
>>>>> think the tests make it reasonably clear. If we think this is too
>>>>> complicated, we could just use only the -fms-extensions part of the
>>>>> patch.
>>>>>
>>>>> New patch attached, please take a look.
>>>>>
>>>>> Thanks,
>>>>> Hans



More information about the cfe-commits mailing list