[llvm-branch-commits] [clang] Convert to exec-charset inside getPredefinedStringLiteralFromCache (PR #196569)
Abhina Sree via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 26 06:14:27 PDT 2026
https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/196569
>From 6a7aca19d01edbdf4431a421eb36b78d16612199 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 8 May 2026 12:20:45 -0400
Subject: [PATCH 1/2] convert to exec-charset inside
getPredefinedStringLiteralFromCache, test __builtin_FILE()
---
clang/include/clang/Basic/TargetInfo.h | 2 ++
clang/lib/AST/ASTContext.cpp | 10 ++++++++++
clang/lib/Basic/TargetInfo.cpp | 3 +++
clang/lib/Lex/TextEncoding.cpp | 3 ++-
clang/test/CodeGen/systemz-charset.cpp | 4 ++++
5 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 73b055a76dda7..2e64f4ec4cb2e 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -326,6 +326,8 @@ class TargetInfo : public TransferrableTargetInfo,
llvm::TextEncodingConverter *FormatStrConverter;
+ llvm::TextEncodingConverter *ExecStrConverter;
+
/// Retrieve the target options.
TargetOptions &getTargetOpts() const {
assert(TargetOpts && "Missing target options");
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index abf0cd5e18c2b..b9dc80c3a16a4 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13834,6 +13834,16 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
*this, Key, StringLiteralKind::Ordinary,
/*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()),
SourceLocation());
+
+ llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
+ if (Converter) {
+ SmallString<128> Converted;
+ Converter->convert(Result->getString(), Converted);
+ Result = StringLiteral::Create(
+ *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false,
+ getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation());
+ }
+
return Result;
}
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index ef239cec80ddb..c04c9081c4fec 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -197,6 +197,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
FormatStrConverter = new llvm::TextEncodingConverter(
std::move(*llvm::TextEncodingConverter::createNoopConverter()));
+
+ ExecStrConverter = new llvm::TextEncodingConverter(
+ std::move(*llvm::TextEncodingConverter::createNoopConverter()));
}
// Out of line virtual dtor for TargetInfo.
diff --git a/clang/lib/Lex/TextEncoding.cpp b/clang/lib/Lex/TextEncoding.cpp
index df3e4dbcaf4b4..a5d8e5e7da0b1 100644
--- a/clang/lib/Lex/TextEncoding.cpp
+++ b/clang/lib/Lex/TextEncoding.cpp
@@ -38,7 +38,8 @@ TextEncoding::setConvertersFromOptions(TextEncoding &TEC,
if (ErrorOrConverter)
TEC.ToLiteralEncodingConverter =
new TextEncodingConverter(std::move(*ErrorOrConverter));
- else
+ TInfo.ExecStrConverter = TEC.ToLiteralEncodingConverter;
+ } else
return ErrorOrConverter.getError();
ErrorOrConverter = llvm::TextEncodingConverter::create(
diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp
index 59c4ad550cd94..34571a25f0e2c 100644
--- a/clang/test/CodeGen/systemz-charset.cpp
+++ b/clang/test/CodeGen/systemz-charset.cpp
@@ -72,3 +72,7 @@ const char16_t *UnicodeUCNString16 = u"\u00E2\u00AC\U000000DF";
const char32_t *UnicodeUCNString32 = U"\u00E2\u00AC\U000000DF";
//CHECK: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
//CHECK=UTF8: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
+
+const char *file = __builtin_FILE();
+//CHECK: {{.*}}\A2\A8\A2\A3\85\94\A9`\83\88\81\99\A2\85\A3K\83\97\97\00"
+//CHECK-UTF8: {{.*}}systemz-charset.cpp\00"
>From 7787a291f47463b6ae390f73c2dceb7e5f092051 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 22 May 2026 08:51:37 -0400
Subject: [PATCH 2/2] Convert the key before cache lookup to prevent encoding
differences
---
clang/lib/AST/ASTContext.cpp | 18 +++++++++---------
clang/lib/Lex/TextEncoding.cpp | 2 +-
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index b9dc80c3a16a4..fe35175425cbc 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13828,6 +13828,15 @@ QualType ASTContext::getStringLiteralArrayType(QualType EltTy,
StringLiteral *
ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
+ // Apply encoding conversion to the key before cache lookup to ensure
+ // proper deduplication when the same source location is used multiple times
+ SmallString<128> ConvertedKey;
+ llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
+ if (Converter) {
+ Converter->convert(Key, ConvertedKey);
+ Key = ConvertedKey;
+ }
+
StringLiteral *&Result = StringLiteralCache[Key];
if (!Result)
Result = StringLiteral::Create(
@@ -13835,15 +13844,6 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
/*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()),
SourceLocation());
- llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
- if (Converter) {
- SmallString<128> Converted;
- Converter->convert(Result->getString(), Converted);
- Result = StringLiteral::Create(
- *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false,
- getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation());
- }
-
return Result;
}
diff --git a/clang/lib/Lex/TextEncoding.cpp b/clang/lib/Lex/TextEncoding.cpp
index a5d8e5e7da0b1..00e939fe8c6b5 100644
--- a/clang/lib/Lex/TextEncoding.cpp
+++ b/clang/lib/Lex/TextEncoding.cpp
@@ -35,7 +35,7 @@ TextEncoding::setConvertersFromOptions(TextEncoding &TEC,
return std::error_code();
ErrorOr<TextEncodingConverter> ErrorOrConverter =
llvm::TextEncodingConverter::create(UTF8, TEC.LiteralEncoding);
- if (ErrorOrConverter)
+ if (ErrorOrConverter) {
TEC.ToLiteralEncodingConverter =
new TextEncodingConverter(std::move(*ErrorOrConverter));
TInfo.ExecStrConverter = TEC.ToLiteralEncodingConverter;
More information about the llvm-branch-commits
mailing list