r336097 - [ms] Fix mangling of char16_t and char32_t to be compatible with MSVC.

Nico Weber via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 2 05:31:20 PDT 2018


Author: nico
Date: Mon Jul  2 05:31:20 2018
New Revision: 336097

URL: http://llvm.org/viewvc/llvm-project?rev=336097&view=rev
Log:
[ms] Fix mangling of char16_t and char32_t to be compatible with MSVC.

MSVC limits char16_t and char32_t string literal names to 32 bytes of character
data, not to 32 characters. wchar_t string literal names on the other hand can
get up to 64 bytes of character data.

https://reviews.llvm.org/D48781

Modified:
    cfe/trunk/lib/AST/MicrosoftMangle.cpp
    cfe/trunk/test/CodeGenCXX/mangle-ms-string-literals.cpp

Modified: cfe/trunk/lib/AST/MicrosoftMangle.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/MicrosoftMangle.cpp?rev=336097&r1=336096&r2=336097&view=diff
==============================================================================
--- cfe/trunk/lib/AST/MicrosoftMangle.cpp (original)
+++ cfe/trunk/lib/AST/MicrosoftMangle.cpp Mon Jul  2 05:31:20 2018
@@ -3164,9 +3164,9 @@ MicrosoftMangleContextImpl::mangleDynami
 
 void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
                                                      raw_ostream &Out) {
-  // <char-type> ::= 0   # char
-  //             ::= 1   # wchar_t
-  //             ::= ??? # char16_t/char32_t will need a mangling too...
+  // <char-type> ::= 0   # char, char16_t, char32_t
+  //                     # (little endian char data in mangling)
+  //             ::= 1   # wchar_t (big endian char data in mangling)
   //
   // <literal-length> ::= <non-negative integer>  # the length of the literal
   //
@@ -3228,8 +3228,8 @@ void MicrosoftMangleContextImpl::mangleS
   // scheme.
   Mangler.mangleNumber(JC.getCRC());
 
-  // <encoded-string>: The mangled name also contains the first 32 _characters_
-  // (including null-terminator bytes) of the StringLiteral.
+  // <encoded-string>: The mangled name also contains the first 32 bytes
+  // (including null-terminator bytes) of the encoded StringLiteral.
   // Each character is encoded by splitting them into bytes and then encoding
   // the constituent bytes.
   auto MangleByte = [&Mangler](char Byte) {
@@ -3258,17 +3258,17 @@ void MicrosoftMangleContextImpl::mangleS
     }
   };
 
-  // Enforce our 32 character max.
-  unsigned NumCharsToMangle = std::min(32U, SL->getLength());
-  for (unsigned I = 0, E = NumCharsToMangle * SL->getCharByteWidth(); I != E;
-       ++I)
+  // Enforce our 32 bytes max, except wchar_t which gets 32 chars instead.
+  unsigned MaxBytesToMangle = SL->isWide() ? 64U : 32U;
+  unsigned NumBytesToMangle = std::min(MaxBytesToMangle, SL->getByteLength());
+  for (unsigned I = 0; I != NumBytesToMangle; ++I)
     if (SL->isWide())
       MangleByte(GetBigEndianByte(I));
     else
       MangleByte(GetLittleEndianByte(I));
 
   // Encode the NUL terminator if there is room.
-  if (NumCharsToMangle < 32)
+  if (NumBytesToMangle < MaxBytesToMangle)
     for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth();
          ++NullTerminator)
       MangleByte(0);

Modified: cfe/trunk/test/CodeGenCXX/mangle-ms-string-literals.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/mangle-ms-string-literals.cpp?rev=336097&r1=336096&r2=336097&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/mangle-ms-string-literals.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/mangle-ms-string-literals.cpp Mon Jul  2 05:31:20 2018
@@ -719,9 +719,35 @@ const wchar_t *LongWideString = L"012345
 // CHECK: @"??_C at _1EK@KFPEBLPK@?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AAA?$AAB@"
 const wchar_t *UnicodeLiteral = L"\ud7ff";
 // CHECK: @"??_C at _13IIHIAFKH@?W?$PP?$AA?$AA@"
+
 const char *U8Literal = u8"hi";
 // CHECK: @"??_C at _02PCEFGMJL@hi?$AA@"
+const char *LongU8Literal = u8"012345678901234567890123456789ABCDEF";
+// CHECK: @"??_C at _0CF@LABBIIMO at 012345678901234567890123456789AB@"
+
 const char16_t *U16Literal = u"hi";
 // CHECK: @"??_C at _05OMLEGLOC@h?$AAi?$AA?$AA?$AA@"
+// Note this starts with o instead of 0. Else LongWideString would have
+// the same initializer and CodeGenModule::ConstantStringMap would map them
+// to the same global with a shared mangling.
+// FIXME: ConstantStringMap probably shouldn't map things with the same data
+// but different manglings to the same variable.
+const char16_t *LongU16Literal = u"o12345678901234567890123456789ABCDEF";
+// CHECK: @"??_C at _0EK@FEAOBHPP at o?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA@"
+
 const char32_t *U32Literal = U"hi";
 // CHECK: @"??_C at _0M@GFNAJIPG at h?$AA?$AA?$AAi?$AA?$AA?$AA?$AA?$AA?$AA?$AA@"
+const char32_t *LongU32Literal = U"012345678901234567890123456789ABCDEF";
+// CHECK: @"??_C at _0JE@IMHFEDAA at 0?$AA?$AA?$AA1?$AA?$AA?$AA2?$AA?$AA?$AA3?$AA?$AA?$AA4?$AA?$AA?$AA5?$AA?$AA?$AA6?$AA?$AA?$AA7?$AA?$AA?$AA@"
+
+// These all have just the right length that the trailing 0 just fits.
+const char *MaxASCIIString = "012345678901234567890123456789A";
+// CHECK: @"??_C at _0CA@NMANGEKF at 012345678901234567890123456789A?$AA@"
+const wchar_t *MaxWideString = L"012345678901234567890123456789A";
+// CHECK: @"??_C at _1EA@LJAFPILO@?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AAA?$AA?$AA@"
+const char *MaxU8String = u8"012345678901234567890123456789A";
+// CHECK: @"??_C at _0CA@NMANGEKF at 012345678901234567890123456789A?$AA@"
+const char16_t *MaxU16String = u"012345678901234";
+// CHECK: @"??_C at _0CA@NFEFHIFO at 0?$AA1?$AA2?$AA3?$AA4?$AA5?$AA6?$AA7?$AA8?$AA9?$AA0?$AA1?$AA2?$AA3?$AA4?$AA?$AA?$AA@"
+const char32_t *MaxU32String = U"0123456";
+// CHECK: @"??_C at _0CA@KFPHPCC at 0?$AA?$AA?$AA1?$AA?$AA?$AA2?$AA?$AA?$AA3?$AA?$AA?$AA4?$AA?$AA?$AA5?$AA?$AA?$AA6?$AA?$AA?$AA?$AA?$AA?$AA?$AA@"




More information about the cfe-commits mailing list