[llvm-branch-commits] [clang] Convert to exec-charset inside getPredefinedStringLiteralFromCache (PR #196569)

Abhina Sree via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jun 8 11:48:25 PDT 2026


https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/196569

>From d3f8b79f2df7c20b8a06eea7dba18f3d5b016f88 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 8 May 2026 12:20:45 -0400
Subject: [PATCH 1/2] convert to exec-charset inside
 getPredefinedStringLiteralFromCache, test __builtin_FILE()

---
 clang/include/clang/Basic/TargetInfo.h |  2 ++
 clang/lib/AST/ASTContext.cpp           | 10 ++++++++++
 clang/lib/Basic/TargetInfo.cpp         |  3 +++
 clang/lib/Lex/TextEncoding.cpp         |  5 +++--
 clang/test/CodeGen/systemz-charset.cpp |  4 ++++
 5 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 909bde840d3fa..3424cfc7ca7ac 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -326,6 +326,8 @@ class TargetInfo : public TransferrableTargetInfo,
 
   llvm::TextEncodingConverter *FormatStrConverter;
 
+  llvm::TextEncodingConverter *ExecStrConverter;
+
   /// Retrieve the target options.
   TargetOptions &getTargetOpts() const {
     assert(TargetOpts && "Missing target options");
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a401a7471e6fc..e30c24dd9e0fd 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13804,6 +13804,16 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
         *this, Key, StringLiteralKind::Ordinary,
         /*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()),
         SourceLocation());
+
+  llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
+  if (Converter) {
+    SmallString<128> Converted;
+    Converter->convert(Result->getString(), Converted);
+    Result = StringLiteral::Create(
+        *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false,
+        getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation());
+  }
+
   return Result;
 }
 
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 83132c2370ed4..b6f2a599ebb6e 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -197,6 +197,9 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
 
   FormatStrConverter = new llvm::TextEncodingConverter(
       std::move(*llvm::TextEncodingConverter::createNoopConverter()));
+
+  ExecStrConverter = new llvm::TextEncodingConverter(
+      std::move(*llvm::TextEncodingConverter::createNoopConverter()));
 }
 
 // Out of line virtual dtor for TargetInfo.
diff --git a/clang/lib/Lex/TextEncoding.cpp b/clang/lib/Lex/TextEncoding.cpp
index 1edd1e7ff1d83..71e10c3ec9cd0 100644
--- a/clang/lib/Lex/TextEncoding.cpp
+++ b/clang/lib/Lex/TextEncoding.cpp
@@ -35,10 +35,11 @@ TextEncoding::setConvertersFromOptions(TextEncoding &TEC,
     return std::error_code();
   ErrorOr<TextEncodingConverter> ErrorOrConverter =
       llvm::TextEncodingConverter::create(UTF8, TEC.ExecEncoding);
-  if (ErrorOrConverter)
+  if (ErrorOrConverter) {
     TEC.ToExecEncodingConverter =
         new TextEncodingConverter(std::move(*ErrorOrConverter));
-  else
+    TInfo.ExecStrConverter = TEC.ToExecEncodingConverter;
+  } else
     return ErrorOrConverter.getError();
 
   ErrorOrConverter = llvm::TextEncodingConverter::create(
diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp
index 59c4ad550cd94..34571a25f0e2c 100644
--- a/clang/test/CodeGen/systemz-charset.cpp
+++ b/clang/test/CodeGen/systemz-charset.cpp
@@ -72,3 +72,7 @@ const char16_t *UnicodeUCNString16 = u"\u00E2\u00AC\U000000DF";
 const char32_t *UnicodeUCNString32 = U"\u00E2\u00AC\U000000DF";
 //CHECK: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
 //CHECK=UTF8: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
+
+const char *file = __builtin_FILE();
+//CHECK: {{.*}}\A2\A8\A2\A3\85\94\A9`\83\88\81\99\A2\85\A3K\83\97\97\00"
+//CHECK-UTF8: {{.*}}systemz-charset.cpp\00"

>From 326b4058ce4a6737ac0e8260f33b6103737d30ab Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 22 May 2026 08:51:37 -0400
Subject: [PATCH 2/2] Convert the key before cache lookup to prevent encoding
 differences

---
 clang/lib/AST/ASTContext.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e30c24dd9e0fd..a936fdb392241 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13798,6 +13798,15 @@ QualType ASTContext::getStringLiteralArrayType(QualType EltTy,
 
 StringLiteral *
 ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
+  // Apply encoding conversion to the key before cache lookup to ensure
+  // proper deduplication when the same source location is used multiple times
+  SmallString<128> ConvertedKey;
+  llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
+  if (Converter) {
+    Converter->convert(Key, ConvertedKey);
+    Key = ConvertedKey;
+  }
+
   StringLiteral *&Result = StringLiteralCache[Key];
   if (!Result)
     Result = StringLiteral::Create(
@@ -13805,15 +13814,6 @@ ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
         /*Pascal*/ false, getStringLiteralArrayType(CharTy, Key.size()),
         SourceLocation());
 
-  llvm::TextEncodingConverter *Converter = getTargetInfo().ExecStrConverter;
-  if (Converter) {
-    SmallString<128> Converted;
-    Converter->convert(Result->getString(), Converted);
-    Result = StringLiteral::Create(
-        *this, Converted, StringLiteralKind::Ordinary, /*Pascal*/ false,
-        getStringLiteralArrayType(CharTy, Converted.size()), SourceLocation());
-  }
-
   return Result;
 }
 



More information about the llvm-branch-commits mailing list