[llvm-branch-commits] [clang] Add ParserConversionAction (PR #169803)
Abhina Sree via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jun 17 06:52:23 PDT 2026
https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/169803
>From 61be13de8d76bfeaddc5e6ff017ade6f1f4b22a7 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 8 May 2026 12:17:22 -0400
Subject: [PATCH 1/5] add ParserConversionAction, do not translate unevaluated
strings
---
clang/include/clang/Parse/Parser.h | 1 +
clang/include/clang/Sema/Sema.h | 4 +++-
clang/lib/Parse/ParseDecl.cpp | 10 ++++++++++
clang/lib/Parse/ParseDeclCXX.cpp | 2 ++
clang/lib/Parse/ParseExpr.cpp | 6 +++---
clang/lib/Parse/Parser.cpp | 4 ++++
clang/lib/Sema/SemaExpr.cpp | 12 ++++++------
clang/test/CodeGen/systemz-charset-diag.cpp | 8 ++++++++
clang/test/CodeGen/systemz-charset.c | 15 +++++++++++++++
9 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index c6c492b4980af..b441998e54040 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -5715,6 +5715,7 @@ class Parser : public CodeCompletionHandler {
bool Finished;
};
ObjCImplParsingDataRAII *CurParsedObjCImpl;
+ ConversionAction ParserConversionAction;
/// StashAwayMethodOrFunctionBodyTokens - Consume the tokens and store them
/// for later parsing.
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b8d760e7e0975..d54e4ce19166a 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -55,6 +55,7 @@
#include "clang/Basic/TemplateKinds.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/LiteralConverter.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/Attr.h"
#include "clang/Sema/CleanupInfo.h"
@@ -7374,7 +7375,8 @@ class Sema final : public SemaBase {
/// from multiple tokens. However, the common case is that StringToks points
/// to one string.
ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
- Scope *UDLScope = nullptr);
+ Scope *UDLScope = nullptr,
+ ConversionAction Action = CA_ToExecEncoding);
ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 405dddf7991b4..97e0721c02b1b 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -564,6 +564,9 @@ unsigned Parser::ParseAttributeArgsCommon(
nullptr,
Sema::ExpressionEvaluationContextRecord::EK_AttrArgument);
+ SaveAndRestore<ConversionAction> SavedTranslationState(
+ ParserConversionAction, CA_NoConversion);
+
ExprResult ArgExpr = ParseAssignmentExpression();
if (ArgExpr.isInvalid()) {
SkipUntil(tok::r_paren, StopAtSemi);
@@ -644,6 +647,9 @@ void Parser::ParseGNUAttributeArgs(
ParsedAttr::Kind AttrKind =
ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax());
+ SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction,
+ CA_NoConversion);
+
if (AttrKind == ParsedAttr::AT_Availability) {
ParseAvailabilityAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, ScopeName,
ScopeLoc, Form);
@@ -723,6 +729,9 @@ unsigned Parser::ParseClangAttributeArgs(
ParsedAttr::Kind AttrKind =
ParsedAttr::getParsedKind(AttrName, ScopeName, Form.getSyntax());
+ SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction,
+ CA_NoConversion);
+
switch (AttrKind) {
default:
return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc,
@@ -1546,6 +1555,7 @@ void Parser::ParseExternalSourceSymbolAttribute(
SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch);
continue;
}
+
if (Keyword == Ident_language) {
if (HadLanguage) {
Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause)
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 893989bd2398f..388cfa662068a 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -1001,6 +1001,8 @@ Decl *Parser::ParseStaticAssertDeclaration(SourceLocation &DeclEnd) {
return nullptr;
}
} else if (tokenIsLikeStringLiteral(Tok, getLangOpts())) {
+ SaveAndRestore<ConversionAction> SavedTranslationState(
+ ParserConversionAction, CA_NoConversion);
AssertMessage = ParseUnevaluatedStringLiteralExpression();
} else {
Diag(Tok, diag::err_expected_string_literal)
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 2987d32d6e0d2..f8855d06fa343 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -3060,9 +3060,9 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
}
// Pass the set of string tokens, ready for concatenation, to the actions.
- return Actions.ActOnStringLiteral(StringToks,
- AllowUserDefinedLiteral ? getCurScope()
- : nullptr);
+ return Actions.ActOnStringLiteral(
+ StringToks, AllowUserDefinedLiteral ? getCurScope() : nullptr,
+ ParserConversionAction);
}
ExprResult Parser::ParseGenericSelectionExpression() {
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 5e1fd4df1a3f0..7ac5e0a36d60e 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -70,6 +70,8 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool skipFunctionBodies)
NumCachedScopes = 0;
CurParsedObjCImpl = nullptr;
+ ParserConversionAction = CA_ToExecEncoding;
+
// Add #pragma handlers. These are removed and destroyed in the
// destructor.
initializePragmaHandlers();
@@ -1551,6 +1553,8 @@ void Parser::ParseKNRParamDeclarations(Declarator &D) {
}
ExprResult Parser::ParseAsmStringLiteral(bool ForAsmLabel) {
+ SaveAndRestore<ConversionAction> SavedTranslationState(ParserConversionAction,
+ CA_NoConversion);
ExprResult AsmString;
if (isTokenStringLiteral()) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index eea63e2497e06..089fdc5c5b6cc 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -2159,8 +2159,8 @@ ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) {
if (getLangOpts().MicrosoftExt)
StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks);
- StringLiteralParser Literal(StringToks, PP,
- StringLiteralEvalMethod::Unevaluated);
+ StringLiteralParser Literal(
+ StringToks, PP, StringLiteralEvalMethod::Unevaluated, CA_NoConversion);
if (Literal.hadError)
return ExprError();
@@ -2231,8 +2231,8 @@ Sema::ExpandFunctionLocalPredefinedMacros(ArrayRef<Token> Toks) {
return ExpandedToks;
}
-ExprResult
-Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
+ExprResult Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope,
+ ConversionAction Action) {
assert(!StringToks.empty() && "Must have at least one string!");
// StringToks needs backing storage as it doesn't hold array elements itself
@@ -2240,8 +2240,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
if (getLangOpts().MicrosoftExt)
StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks);
- StringLiteralParser Literal(
- StringToks, PP, StringLiteralEvalMethod::Evaluated, CA_ToExecEncoding);
+ StringLiteralParser Literal(StringToks, PP,
+ StringLiteralEvalMethod::Evaluated, Action);
if (Literal.hadError)
return ExprError();
diff --git a/clang/test/CodeGen/systemz-charset-diag.cpp b/clang/test/CodeGen/systemz-charset-diag.cpp
index 5b398b4b58af6..08e6945c484f3 100644
--- a/clang/test/CodeGen/systemz-charset-diag.cpp
+++ b/clang/test/CodeGen/systemz-charset-diag.cpp
@@ -1,3 +1,11 @@
// RUN: %clang_cc1 -triple s390x-none-zos -fexec-charset IBM-1047 %s -std=c++17 -emit-llvm -o - -verify
const char* Computer = "🖥️"; // expected-error-re {{conversion to execution encoding failed: {{.*}}}}
+
+static_assert(false, "Error string"); // expected-error {{static assertion failed: Error string}}
+
+[[deprecated("message")]] void test_deprecated() {return;} // expected-note {{'test_deprecated' has been explicitly marked deprecated here}}
+
+int main() {
+ test_deprecated(); // expected-warning {{'test_deprecated' is deprecated: message}}
+}
diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c
index 897b9d2eeefa1..5279b780531c3 100644
--- a/clang/test/CodeGen/systemz-charset.c
+++ b/clang/test/CodeGen/systemz-charset.c
@@ -56,3 +56,18 @@ const char *Unicode = "ÿ";
// RUN: not %clang_cc1 -fexec-charset invalid %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
// CHECK-ERROR: error: failed to set fexec-charset to 'invalid'
+void test1() {
+ printf(__FUNCTION__);
+}
+//CHECK: @__FUNCTION__.test1 = private unnamed_addr constant [6 x i8] c"\A3\85\A2\A3\F1\00"
+
+#define HELLO "Hello "
+#define WORLD "World!"
+#define HELLO_WORLD HELLO WORLD
+const char* hello_macro = HELLO;
+//CHECK: c"\C8\85\93\93\96@\00"
+//CHECK-UTF8 = c"Hello\00"
+
+const char* preprocessor_concatenation = HELLO_WORLD;
+//CHECK: c"\C8\85\93\93\96@\E6\96\99\93\84Z\00"
+//CHECK-UTF8: c"Hello World!\00"
>From 9c950e0ffc3e3e51b34bdec451f08b5709ca60bd Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 8 May 2026 12:29:23 -0400
Subject: [PATCH 2/5] Remove old include
---
clang/include/clang/Sema/Sema.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d54e4ce19166a..aecd0d1c2f5dd 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -55,7 +55,6 @@
#include "clang/Basic/TemplateKinds.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/TypeTraits.h"
-#include "clang/Lex/LiteralConverter.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/Attr.h"
#include "clang/Sema/CleanupInfo.h"
>From fb0f71d782b7d0ce0131cc5fbaec3c7c88b0ec56 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Mon, 11 May 2026 09:27:48 -0400
Subject: [PATCH 3/5] Fix build failure
---
clang/include/clang/Sema/Sema.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index aecd0d1c2f5dd..5d00b0c94daa3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -55,6 +55,7 @@
#include "clang/Basic/TemplateKinds.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/TextEncodingConfig.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/Attr.h"
#include "clang/Sema/CleanupInfo.h"
>From 53ef2d19bd5b3d0d767d24786e00bac0c91eee87 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Tue, 12 May 2026 08:07:08 -0400
Subject: [PATCH 4/5] fix CI
---
clang/test/CodeGen/systemz-charset.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c
index 5279b780531c3..78ae3353224af 100644
--- a/clang/test/CodeGen/systemz-charset.c
+++ b/clang/test/CodeGen/systemz-charset.c
@@ -1,6 +1,8 @@
// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s
// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8
+int printf(char const *, ...);
+
const char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
//CHECK: c"\C1\C2\C3\C4\C5\C6\C7\C8\C9\D1\D2\D3\D4\D5\D6\D7\D8\D9\E2\E3\E4\E5\E6\E7\E8\E9\00"
//CHECK-UTF8: c"ABCDEFGHIJKLMNOPQRSTUVWXYZ\00"
>From b4a09ebb25f9c4acab111a7de19748f4b2c540f0 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Tue, 12 May 2026 15:21:15 -0400
Subject: [PATCH 5/5] fix CI
---
clang/include/clang/AST/Expr.h | 6 ++++++
clang/include/clang/Sema/Sema.h | 2 +-
clang/lib/AST/Expr.cpp | 14 ++++++++++++++
clang/lib/Parse/ParseDecl.cpp | 1 -
clang/lib/Sema/SemaExpr.cpp | 5 +++--
5 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index b91bf4a5375fb..69ac328c8f0a7 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -28,6 +28,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TypeTraits.h"
+#include "clang/Lex/TextEncoding.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
@@ -2066,6 +2067,11 @@ class PredefinedExpr final
return getIdentKindName(getIdentKind());
}
+ static std::string
+ ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl,
+ TextEncoding &TE,
+ bool ForceElaboratedPrinting = false);
+
static std::string ComputeName(PredefinedIdentKind IK,
const Decl *CurrentDecl,
bool ForceElaboratedPrinting = false);
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5d00b0c94daa3..1db607b657a84 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -55,7 +55,7 @@
#include "clang/Basic/TemplateKinds.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Basic/TypeTraits.h"
-#include "clang/Lex/TextEncodingConfig.h"
+#include "clang/Lex/TextEncoding.h"
#include "clang/Sema/AnalysisBasedWarnings.h"
#include "clang/Sema/Attr.h"
#include "clang/Sema/CleanupInfo.h"
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 90747be4208e1..9e8e0e45c3c83 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -673,6 +673,20 @@ StringRef PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) {
llvm_unreachable("Unknown ident kind for PredefinedExpr");
}
+std::string PredefinedExpr::ComputeNameAndTranslate(
+ PredefinedIdentKind IK, const Decl *CurrentDecl, TextEncoding &TE,
+ bool ForceElaboratedPrinting) {
+ using namespace clang::charinfo;
+ std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting);
+ llvm::TextEncodingConverter *Converter = TE.getConverter(CA_ToExecEncoding);
+ if (Converter) {
+ SmallString<128> Converted;
+ Converter->convert(Result, Converted);
+ Result = std::string(Converted);
+ }
+ return Result;
+}
+
// FIXME: Maybe this should use DeclPrinter with a special "print predefined
// expr" policy instead.
std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 97e0721c02b1b..3aa41ebc05397 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -1555,7 +1555,6 @@ void Parser::ParseExternalSourceSymbolAttribute(
SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch);
continue;
}
-
if (Keyword == Ident_language) {
if (HadLanguage) {
Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 089fdc5c5b6cc..eac281b523862 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3636,8 +3636,9 @@ ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc,
// the string.
bool ForceElaboratedPrinting =
IK == PredefinedIdentKind::Function && getLangOpts().MSVCCompat;
- auto Str =
- PredefinedExpr::ComputeName(IK, currentDecl, ForceElaboratedPrinting);
+ auto Str = PredefinedExpr::ComputeNameAndTranslate(
+ IK, currentDecl, getPreprocessor().getTextEncoding(),
+ ForceElaboratedPrinting);
unsigned Length = Str.length();
llvm::APInt LengthI(32, Length + 1);
More information about the llvm-branch-commits
mailing list