[PATCH] D100346: [Clang] String Literal and Wide String Literal Encoding from the Preprocessor

ThePhD via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 12 20:22:00 PDT 2021


ThePhD updated this revision to Diff 337035.
ThePhD marked an inline comment as done.
Herald added subscribers: aheejin, dschuff.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100346/new/

https://reviews.llvm.org/D100346

Files:
  clang/docs/LanguageExtensions.rst
  clang/lib/Frontend/InitPreprocessor.cpp
  clang/test/Preprocessor/init-x86.c
  clang/test/Preprocessor/init.c


Index: clang/test/Preprocessor/init.c
===================================================================
--- clang/test/Preprocessor/init.c
+++ clang/test/Preprocessor/init.c
@@ -115,10 +115,12 @@
 // COMMON:#define __STDC__ 1
 // COMMON:#define __VERSION__ {{.*}}
 // COMMON:#define __clang__ 1
+// COMMON:#define __clang_literal_encoding__ {{.*}}
 // COMMON:#define __clang_major__ {{[0-9]+}}
 // COMMON:#define __clang_minor__ {{[0-9]+}}
 // COMMON:#define __clang_patchlevel__ {{[0-9]+}}
 // COMMON:#define __clang_version__ {{.*}}
+// COMMON:#define __clang_wide_literal_encoding__ {{.*}}
 // COMMON:#define __llvm__ 1
 //
 // RUN: %clang_cc1 -E -dM -triple=x86_64-pc-win32 < /dev/null | FileCheck -match-full-lines -check-prefix C-DEFAULT %s
@@ -1844,10 +1846,12 @@
 // WEBASSEMBLY-NOT:#define __WINT_UNSIGNED__
 // WEBASSEMBLY-NEXT:#define __WINT_WIDTH__ 32
 // WEBASSEMBLY-NEXT:#define __clang__ 1
+// WEBASSEMBLY-NEXT:#define __clang_literal_encoding__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_major__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_minor__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_patchlevel__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __clang_version__ "{{.*}}"
+// WEBASSEMBLY-NEXT:#define __clang_wide_literal_encoding__ {{.*}}
 // WEBASSEMBLY-NEXT:#define __llvm__ 1
 // WEBASSEMBLY-NOT:#define __unix
 // WEBASSEMBLY-NOT:#define __unix__
Index: clang/test/Preprocessor/init-x86.c
===================================================================
--- clang/test/Preprocessor/init-x86.c
+++ clang/test/Preprocessor/init-x86.c
@@ -1306,10 +1306,12 @@
 // X86_64-CLOUDABI:#define __amd64 1
 // X86_64-CLOUDABI:#define __amd64__ 1
 // X86_64-CLOUDABI:#define __clang__ 1
+// X86_64-CLOUDABI:#define __clang_literal_encoding__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_major__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_minor__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_patchlevel__ {{.*}}
 // X86_64-CLOUDABI:#define __clang_version__ {{.*}}
+// X86_64-CLOUDABI:#define __clang_wide_literal_encoding__ {{.*}}
 // X86_64-CLOUDABI:#define __llvm__ 1
 // X86_64-CLOUDABI:#define __x86_64 1
 // X86_64-CLOUDABI:#define __x86_64__ 1
Index: clang/lib/Frontend/InitPreprocessor.cpp
===================================================================
--- clang/lib/Frontend/InitPreprocessor.cpp
+++ clang/lib/Frontend/InitPreprocessor.cpp
@@ -778,6 +778,21 @@
     }
   }
 
+  // macros to help identify the narrow and wide character sets
+  // NOTE: clang currently ignores -fexec-charset=. If this changes,
+  // then this may need to be updated.
+  Builder.defineMacro("__clang_literal_encoding__", "\"UTF-8\"");
+  if (TI.getTypeWidth(TI.getWCharType()) >= 32) {
+    // NOTE: 32-bit wchar_t signals UTF-32. This may change
+    // if -fwide-exec-charset= is ever supported.
+    Builder.defineMacro("__clang_wide_literal_encoding__", "\"UTF-32\"");
+  } else {
+    // NOTE: Less-than 32-bit wchar_t generally means UTF-16
+    // (e.g., Windows, 32-bit IBM). This may need to be
+    // updated if -fwide-exec-charset= is ever supported.
+    Builder.defineMacro("__clang_wide_literal_encoding__", "\"UTF-16\"");
+  }
+
   if (LangOpts.Optimize)
     Builder.defineMacro("__OPTIMIZE__");
   if (LangOpts.OptimizeSize)
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -383,6 +383,17 @@
   Defined to a string that captures the Clang marketing version, including the
   Subversion tag or revision number, e.g., "``1.5 (trunk 102332)``".
 
+``__clang_literal_encoding__``
+  Defined to a string that represents the current encoding of string literals,
+  e.g., ``"hello"``. This is typically "UTF-8" (but may change in the future
+  if the ``-fexec-charset="Encoding-Name"`` option is implemented.)
+
+``__clang_wide_literal_encoding__``
+  Defined to a string that represents the current encoding of wide string
+  literals, e.g., ``L"hello"``. This is typically "UTF-16" or "UTF-32"
+  (but may change in the future if the
+  ``-fwide-exec-charset="Encoding-Name"`` option is implemented.)
+
 .. _langext-vectors:
 
 Vectors and Extended Vectors


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100346.337035.patch
Type: text/x-patch
Size: 4210 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20210413/cb08cc7b/attachment.bin>


More information about the cfe-commits mailing list