[llvm-branch-commits] [clang] Convert to exec-charset inside getPredefinedStringLiteralFromCache (PR #196569)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 8 09:27:11 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Abhina Sree (abhina-sree)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/196569.diff
5 Files Affected:
- (modified) clang/include/clang/Basic/TargetInfo.h (+2)
- (modified) clang/lib/AST/ASTContext.cpp (+10)
- (modified) clang/lib/Basic/TargetInfo.cpp (+3)
- (modified) clang/lib/Lex/TextEncodingConfig.cpp (+5-4)
- (modified) clang/test/CodeGen/systemz-charset.cpp (+28)
``````````diff
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index ec7d4fcd4d8e3..6c0e65a85ee13 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -326,6 +326,8 @@ class TargetInfo : public TransferrableTargetInfo,
llvm::TextEncodingConverter *FormatStrConverter;
+ llvm::TextEncodingConverter *ExecStrConverter;
+
/// Retrieve the target options.
TargetOptions &getTargetOpts() const {
assert(TargetOpts && "Missing target options");
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a0894318dbd53..80e073385ce82 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13752,6 +13752,16 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
*this, Key, StringLiteralKind::Ordinary,
/*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()),
SourceLocation());
+
+ llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
+ if (Converter) {
+ SmallString<128> Converted;
+ Converter->convert(Result->getString(), Converted);
+ Result = StringLiteral::Create(
+ *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false,
+ getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation());
+ }
+
return Result;
}
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 43efca42886cc..0c553033ad069 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -197,6 +197,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
FormatStrConverter = new llvm::TextEncodingConverter(
std::move(*llvm::TextEncodingConverter::createNoopConverter()));
+
+ ExecStrConverter = new llvm::TextEncodingConverter(
+ std::move(*llvm::TextEncodingConverter::createNoopConverter()));
}
// Out of line virtual dtor for TargetInfo.
diff --git a/clang/lib/Lex/TextEncodingConfig.cpp b/clang/lib/Lex/TextEncodingConfig.cpp
index 427b75a1c0a8b..6df88e258ffde 100644
--- a/clang/lib/Lex/TextEncodingConfig.cpp
+++ b/clang/lib/Lex/TextEncodingConfig.cpp
@@ -37,14 +37,15 @@ TextEncodingConfig::setConvertersFromOptions(TextEncodingConfig &TEC,
return std::error_code();
ErrorOr<TextEncodingConverter> ErrorOrConverter =
llvm::TextEncodingConverter::create(UTF8, TEC.ExecEncoding);
- if (ErrorOrConverter)
+ if (ErrorOrConverter) {
TEC.ToExecEncodingConverter =
new TextEncodingConverter(std::move(*ErrorOrConverter));
- else
+ TInfo.ExecStrConverter = TEC.ToExecEncodingConverter;
+ } else
return ErrorOrConverter.getError();
- ErrorOrConverter = llvm::TextEncodingConverter::create(TEC.SystemEncoding,
- TEC.InternalEncoding);
+ ErrorOrConverter = llvm::TextEncodingConverter::create(
+ TInfo.getTriple().getDefaultNarrowTextEncoding(), UTF8);
if (ErrorOrConverter)
TInfo.FormatStrConverter =
diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp
index f7becd5b39492..f8219ca00d20a 100644
--- a/clang/test/CodeGen/systemz-charset.cpp
+++ b/clang/test/CodeGen/systemz-charset.cpp
@@ -1,46 +1,74 @@
// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -std=c++17 -fexec-charset IBM-1047 -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -o - | FileCheck %s --check-prefix=CHECK-UTF8
const char *RawString = R"(Hello\n)";
//CHECK: c"\C8\85\93\93\96\E0\95\00"
+//CHECK-UTF8: c"Hello\\n\00"
const char *MultiLineRawString = R"(
Hello
There)";
//CHECK: c"\15\C8\85\93\93\96\15\E3\88\85\99\85\00"
+//CHECK-UTF8: c"\0AHello\0AThere\00"
char UnicodeChar8 = u8'1';
//CHECK: i8 49
+//CHECK-UTF8: i8 49
char16_t UnicodeChar16 = u'1';
//CHECK: i16 49
+//CHECK-UTF8: i16 49
char32_t UnicodeChar32 = U'1';
//CHECK: i32 49
+//CHECK-UTF8: i32 49
const char *EscapeCharacters8 = u8"\a\b\f\n\r\t\v\\\'\"\?";
//CHECK: c"\07\08\0C\0A\0D\09\0B\\'\22?\00"
+//CHECK-UTF8: c"\07\08\0C\0A\0D\09\0B\\'\22?\00"
const char16_t *EscapeCharacters16 = u"\a\b\f\n\r\t\v\\\'\"\?";
//CHECK: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0]
+//CHECK-UTF8: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0]
const char32_t *EscapeCharacters32 = U"\a\b\f\n\r\t\v\\\'\"\?";
//CHECK: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0]
+//CHECK-UTF8: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0]
const char *UnicodeString8 = u8"Hello";
//CHECK: c"Hello\00"
+//CHECK-UTF8: c"Hello\00"
+
const char16_t *UnicodeString16 = u"Hello";
//CHECK: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0]
+//CHECK-UTF8: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0]
+
const char32_t *UnicodeString32 = U"Hello";
//CHECK: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0]
+//CHECK=UTF8: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0]
const char *UnicodeRawString8 = u8R"("Hello\")";
//CHECK: c"\22Hello\\\22\00"
+//CHECK=UTF8: c"\22Hello\\\22\00"
+
const char16_t *UnicodeRawString16 = uR"("Hello\")";
//CHECK: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0]
+//CHECK=UTF8: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0]
+
const char32_t *UnicodeRawString32 = UR"("Hello\")";
//CHECK: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0]
+//CHECK=UTF8: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0]
const char *UnicodeUCNString8 = u8"\u00E2\u00AC\U000000DF";
//CHECK: c"\C3\A2\C2\AC\C3\9F\00"
+//CHECK=UTF8: c"\C3\A2\C2\AC\C3\9F\00"
+
const char16_t *UnicodeUCNString16 = u"\u00E2\u00AC\U000000DF";
//CHECK: [4 x i16] [i16 226, i16 172, i16 223, i16 0]
+//CHECK=UTF8: [4 x i16] [i16 226, i16 172, i16 223, i16 0]
+
const char32_t *UnicodeUCNString32 = U"\u00E2\u00AC\U000000DF";
//CHECK: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
+//CHECK=UTF8: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
+
+const char *file = __builtin_FILE();
+//CHECK: {{.*}}\A2\A8\A2\A3\85\94\A9`\83\88\81\99\A2\85\A3K\83\97\97\00"
+//CHECK-UTF8: {{.*}}systemz-charset.cpp\00"
``````````
</details>
https://github.com/llvm/llvm-project/pull/196569
More information about the llvm-branch-commits
mailing list