[clang] [llvm] Enable fexec-charset option (PR #138895)
Abhina Sree via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 17 09:18:29 PDT 2025
https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/138895
>From 142119ec8a3ca155d37131cd2d4a108a83ab5884 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Wed, 7 May 2025 11:26:59 -0400
Subject: [PATCH 1/3] This patch enables the fexec-charset option to control
the execution charset of string literals. It sets the default internal
charset, system charset, and execution charset for z/OS and UTF-8 for all
other platforms.
(cherry picked from commit 0295d0da4db8b8fcd54084dc6ae95d8b0bbf45d9)
(cherry picked from commit e379f6cb9d063cb78c6b48b0e0a8d9f241958f89)
---
clang/docs/LanguageExtensions.rst | 3 +-
clang/include/clang/Basic/LangOptions.h | 3 +
clang/include/clang/Basic/TokenKinds.h | 7 ++
clang/include/clang/Driver/Options.td | 5 +
clang/include/clang/Lex/LiteralConverter.h | 36 ++++++
clang/include/clang/Lex/LiteralSupport.h | 19 +--
clang/include/clang/Lex/Preprocessor.h | 3 +
clang/lib/Driver/ToolChains/Clang.cpp | 17 ++-
clang/lib/Frontend/CompilerInstance.cpp | 4 +
clang/lib/Frontend/InitPreprocessor.cpp | 12 +-
clang/lib/Lex/CMakeLists.txt | 1 +
clang/lib/Lex/LiteralConverter.cpp | 69 +++++++++++
clang/lib/Lex/LiteralSupport.cpp | 133 +++++++++++++++++----
clang/test/CodeGen/systemz-charset.c | 35 ++++++
clang/test/CodeGen/systemz-charset.cpp | 46 +++++++
clang/test/Driver/cl-options.c | 7 +-
clang/test/Driver/clang_f_opts.c | 12 +-
clang/test/Preprocessor/init-s390x.c | 1 +
llvm/include/llvm/TargetParser/Triple.h | 3 +
llvm/lib/TargetParser/Triple.cpp | 7 ++
20 files changed, 375 insertions(+), 48 deletions(-)
create mode 100644 clang/include/clang/Lex/LiteralConverter.h
create mode 100644 clang/lib/Lex/LiteralConverter.cpp
create mode 100644 clang/test/CodeGen/systemz-charset.c
create mode 100644 clang/test/CodeGen/systemz-charset.cpp
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 6bb99c757cd19..960877389ab36 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -420,8 +420,7 @@ Builtin Macros
``__clang_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
narrow string literals, e.g., ``"hello"``. This macro typically expands to
- "UTF-8" (but may change in the future if the
- ``-fexec-charset="Encoding-Name"`` option is implemented.)
+ the text encoding specified by -fexec-charset if specified, or the system charset.
``__clang_wide_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 260a7537edb9d..482e621b04ba6 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -565,6 +565,9 @@ class LangOptions : public LangOptionsBase {
bool AtomicFineGrainedMemory = false;
bool AtomicIgnoreDenormalMode = false;
+ /// Name of the exec charset to convert the internal charset to.
+ std::string ExecCharset;
+
LangOptions();
/// Set language defaults for the given input language and
diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h
index d84f3598cbf33..bdf95b149fc35 100644
--- a/clang/include/clang/Basic/TokenKinds.h
+++ b/clang/include/clang/Basic/TokenKinds.h
@@ -111,6 +111,13 @@ inline bool isLiteral(TokenKind K) {
return isInLiteralRange;
}
+/// Return true if this is a utf literal kind.
+inline bool isUTFLiteral(TokenKind K) {
+ return K == tok::utf8_char_constant || K == tok::utf8_string_literal ||
+ K == tok::utf16_char_constant || K == tok::utf16_string_literal ||
+ K == tok::utf32_char_constant || K == tok::utf32_string_literal;
+}
+
/// Return true if this is any of tok::annot_* kinds.
bool isAnnotation(TokenKind K);
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7ae153deb9a55..a960283d418aa 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7467,6 +7467,11 @@ let Visibility = [CC1Option, CC1AsOption, FC1Option] in {
def tune_cpu : Separate<["-"], "tune-cpu">,
HelpText<"Tune for a specific cpu type">,
MarshallingInfoString<TargetOpts<"TuneCPU">>;
+def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<charset>">,
+ HelpText<"Set the execution <charset> for string and character literals. "
+ "Supported character encodings include ISO8859-1, UTF-8, IBM-1047 "
+ "and those supported by the host icu or iconv library.">,
+ MarshallingInfoString<LangOpts<"ExecCharset">>;
def target_cpu : Separate<["-"], "target-cpu">,
HelpText<"Target a specific cpu type">,
MarshallingInfoString<TargetOpts<"CPU">>;
diff --git a/clang/include/clang/Lex/LiteralConverter.h b/clang/include/clang/Lex/LiteralConverter.h
new file mode 100644
index 0000000000000..999b2c146930f
--- /dev/null
+++ b/clang/include/clang/Lex/LiteralConverter.h
@@ -0,0 +1,36 @@
+//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_LITERALCONVERTER_H
+#define LLVM_CLANG_LEX_LITERALCONVERTER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/TextEncoding.h"
+
+enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset };
+
+class LiteralConverter {
+ llvm::StringRef InternalCharset;
+ llvm::StringRef SystemCharset;
+ llvm::StringRef ExecCharset;
+ llvm::StringMap<llvm::TextEncodingConverter> TextEncodingConverters;
+
+public:
+ llvm::TextEncodingConverter *getConverter(const char *Codepage);
+ llvm::TextEncodingConverter *getConverter(ConversionAction Action);
+ llvm::TextEncodingConverter *createAndInsertCharConverter(const char *To);
+ void setConvertersFromOptions(const clang::LangOptions &Opts,
+ const clang::TargetInfo &TInfo,
+ clang::DiagnosticsEngine &Diags);
+};
+
+#endif
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index ea5f63bc20399..eaa2016c6a888 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -17,12 +17,13 @@
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/LiteralConverter.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
-
+#include "llvm/Support/TextEncoding.h"
namespace clang {
class DiagnosticsEngine;
@@ -233,6 +234,7 @@ class StringLiteralParser {
const LangOptions &Features;
const TargetInfo &Target;
DiagnosticsEngine *Diags;
+ LiteralConverter *LiteralConv;
unsigned MaxTokenLength;
unsigned SizeBound;
@@ -246,18 +248,19 @@ class StringLiteralParser {
StringLiteralEvalMethod EvalMethod;
public:
- StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
- StringLiteralEvalMethod StringMethod =
- StringLiteralEvalMethod::Evaluated);
+ StringLiteralParser(
+ ArrayRef<Token> StringToks, Preprocessor &PP,
+ StringLiteralEvalMethod StringMethod = StringLiteralEvalMethod::Evaluated,
+ ConversionAction Action = ToExecCharset);
StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
const LangOptions &features, const TargetInfo &target,
DiagnosticsEngine *diags = nullptr)
: SM(sm), Features(features), Target(target), Diags(diags),
- MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
- ResultPtr(ResultBuf.data()),
+ LiteralConv(nullptr), MaxTokenLength(0), SizeBound(0), CharByteWidth(0),
+ Kind(tok::unknown), ResultPtr(ResultBuf.data()),
EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
Pascal(false) {
- init(StringToks);
+ init(StringToks, NoConversion);
}
bool hadError;
@@ -305,7 +308,7 @@ class StringLiteralParser {
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
private:
- void init(ArrayRef<Token> StringToks);
+ void init(ArrayRef<Token> StringToks, ConversionAction Action);
bool CopyStringFragment(const Token &Tok, const char *TokBegin,
StringRef Fragment);
void DiagnoseLexingError(SourceLocation Loc);
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 39754847a93e4..8dd27bc414f80 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -25,6 +25,7 @@
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Lexer.h"
+#include "clang/Lex/LiteralConverter.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/ModuleMap.h"
@@ -163,6 +164,7 @@ class Preprocessor {
std::unique_ptr<ScratchBuffer> ScratchBuf;
HeaderSearch &HeaderInfo;
ModuleLoader &TheModuleLoader;
+ LiteralConverter LiteralConv;
/// External source of macros.
ExternalPreprocessorSource *ExternalSource;
@@ -1235,6 +1237,7 @@ class Preprocessor {
SelectorTable &getSelectorTable() { return Selectors; }
Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
+ LiteralConverter &getLiteralConverter() { return LiteralConv; }
void setExternalSource(ExternalPreprocessorSource *Source) {
ExternalSource = Source;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a7310ba2da061..dbceaf8f959bb 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -49,6 +49,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/TextEncoding.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
@@ -7416,12 +7417,20 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
<< value;
}
- // -fexec_charset=UTF-8 is default. Reject others
+ // Set the default fexec-charset as the system charset.
+ CmdArgs.push_back("-fexec-charset");
+ CmdArgs.push_back(Args.MakeArgString(Triple.getSystemCharset()));
if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
StringRef value = execCharset->getValue();
- if (!value.equals_insensitive("utf-8"))
- D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
- << value;
+ llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
+ llvm::TextEncodingConverter::create("UTF-8", value.data());
+ if (ErrorOrConverter) {
+ CmdArgs.push_back("-fexec-charset");
+ CmdArgs.push_back(Args.MakeArgString(value));
+ } else {
+ D.Diag(diag::err_drv_invalid_value)
+ << execCharset->getAsString(Args) << value;
+ }
}
RenderDiagnosticsOptions(D, Args, CmdArgs);
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 584436665622d..41dd377c4c1e1 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -32,6 +32,7 @@
#include "clang/Frontend/Utils.h"
#include "clang/Frontend/VerifyDiagnosticConsumer.h"
#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LiteralConverter.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Sema/CodeCompleteConsumer.h"
@@ -543,6 +544,9 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
if (GetDependencyDirectives)
PP->setDependencyDirectivesGetter(*GetDependencyDirectives);
+
+ PP->getLiteralConverter().setConvertersFromOptions(getLangOpts(), getTarget(),
+ getDiagnostics());
}
std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) {
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index baad63179d89a..1e4030622165e 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1020,10 +1020,14 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
}
}
- // Macros to help identify the narrow and wide character sets
- // FIXME: clang currently ignores -fexec-charset=. If this changes,
- // then this may need to be updated.
- Builder.defineMacro("__clang_literal_encoding__", "\"UTF-8\"");
+ // Macros to help identify the narrow and wide character sets. This is set
+ // to fexec-charset. If fexec-charset is not specified, the default is the
+ // system charset.
+ if (!LangOpts.ExecCharset.empty())
+ Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecCharset);
+ else
+ Builder.defineMacro("__clang_literal_encoding__",
+ TI.getTriple().getSystemCharset());
if (TI.getTypeWidth(TI.getWCharType()) >= 32) {
// FIXME: 32-bit wchar_t signals UTF-32. This may change
// if -fwide-exec-charset= is ever supported.
diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt
index f61737cd68021..9e38a1b8fbb44 100644
--- a/clang/lib/Lex/CMakeLists.txt
+++ b/clang/lib/Lex/CMakeLists.txt
@@ -12,6 +12,7 @@ add_clang_library(clangLex
InitHeaderSearch.cpp
Lexer.cpp
LexHLSLRootSignature.cpp
+ LiteralConverter.cpp
LiteralSupport.cpp
MacroArgs.cpp
MacroInfo.cpp
diff --git a/clang/lib/Lex/LiteralConverter.cpp b/clang/lib/Lex/LiteralConverter.cpp
new file mode 100644
index 0000000000000..b00f44a238ec0
--- /dev/null
+++ b/clang/lib/Lex/LiteralConverter.cpp
@@ -0,0 +1,69 @@
+//===--- LiteralConverter.cpp - Translator for String Literals -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/LiteralConverter.h"
+#include "clang/Basic/DiagnosticDriver.h"
+
+using namespace llvm;
+
+llvm::TextEncodingConverter *
+LiteralConverter::getConverter(const char *Codepage) {
+ auto Iter = TextEncodingConverters.find(Codepage);
+ if (Iter != TextEncodingConverters.end())
+ return &Iter->second;
+ return nullptr;
+}
+
+llvm::TextEncodingConverter *
+LiteralConverter::getConverter(ConversionAction Action) {
+ StringRef CodePage;
+ if (Action == ToSystemCharset)
+ CodePage = SystemCharset;
+ else if (Action == ToExecCharset)
+ CodePage = ExecCharset;
+ else
+ CodePage = InternalCharset;
+ return getConverter(CodePage.data());
+}
+
+llvm::TextEncodingConverter *
+LiteralConverter::createAndInsertCharConverter(const char *To) {
+ const char *From = InternalCharset.data();
+ llvm::TextEncodingConverter *Converter = getConverter(To);
+ if (Converter)
+ return Converter;
+
+ ErrorOr<TextEncodingConverter> ErrorOrConverter =
+ llvm::TextEncodingConverter::create(From, To);
+ if (!ErrorOrConverter)
+ return nullptr;
+ TextEncodingConverters.insert_or_assign(StringRef(To),
+ std::move(*ErrorOrConverter));
+ return getConverter(To);
+}
+
+void LiteralConverter::setConvertersFromOptions(
+ const clang::LangOptions &Opts, const clang::TargetInfo &TInfo,
+ clang::DiagnosticsEngine &Diags) {
+ using namespace llvm;
+ SystemCharset = TInfo.getTriple().getSystemCharset();
+ InternalCharset = "UTF-8";
+ ExecCharset = Opts.ExecCharset.empty() ? InternalCharset : Opts.ExecCharset;
+ // Create converter between internal and system charset
+ if (InternalCharset != SystemCharset)
+ createAndInsertCharConverter(SystemCharset.data());
+
+ // Create converter between internal and exec charset specified
+ // in fexec-charset option.
+ if (InternalCharset == ExecCharset)
+ return;
+ if (!createAndInsertCharConverter(ExecCharset.data())) {
+ Diags.Report(clang::diag::err_drv_invalid_value)
+ << "-fexec-charset" << ExecCharset;
+ }
+}
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 5b08d7f0efe5a..e6242b3af4730 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -134,7 +134,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
FullSourceLoc Loc, unsigned CharWidth,
DiagnosticsEngine *Diags,
const LangOptions &Features,
- StringLiteralEvalMethod EvalMethod) {
+ StringLiteralEvalMethod EvalMethod,
+ llvm::TextEncodingConverter *Converter) {
const char *EscapeBegin = ThisTokBuf;
bool Delimited = false;
bool EndDelimiterFound = false;
@@ -146,6 +147,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
// that would have been \", which would not have been the end of string.
unsigned ResultChar = *ThisTokBuf++;
char Escape = ResultChar;
+ bool Translate = true;
+ bool Invalid = false;
switch (ResultChar) {
// These map to themselves.
case '\\': case '\'': case '"': case '?': break;
@@ -186,6 +189,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
ResultChar = 11;
break;
case 'x': { // Hex escape.
+ Translate = false;
ResultChar = 0;
if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
Delimited = true;
@@ -249,6 +253,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
case '4': case '5': case '6': case '7': {
// Octal escapes.
--ThisTokBuf;
+ Translate = false;
ResultChar = 0;
// Octal escapes are a series of octal digits with maximum length 3.
@@ -334,6 +339,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
<< std::string(1, ResultChar);
break;
default:
+ Invalid = true;
if (!Diags)
break;
@@ -367,6 +373,15 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
HadError = true;
}
+ if (Translate && Converter) {
+ // Invalid escapes are written as '?' and then translated.
+ char ByteChar = Invalid ? '?' : ResultChar;
+ SmallString<8> ResultCharConv;
+ Converter->convert(StringRef(&ByteChar, 1), ResultCharConv);
+ assert(ResultCharConv.size() == 1 &&
+ "Char size increased after translation");
+ ResultChar = ResultCharConv[0];
+ }
return ResultChar;
}
@@ -1751,6 +1766,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
HadError = false;
Kind = kind;
+ LiteralConverter *LiteralConv = &PP.getLiteralConverter();
const char *TokBegin = begin;
@@ -1817,6 +1833,10 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
largest_character_for_kind = 0x7Fu;
}
+ llvm::TextEncodingConverter *Converter = nullptr;
+ if (!isUTFLiteral(Kind) && LiteralConv)
+ Converter = LiteralConv->getConverter(ToExecCharset);
+
while (begin != end) {
// Is this a span of non-escape characters?
if (begin[0] != '\\') {
@@ -1854,6 +1874,16 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
HadError = true;
PP.Diag(Loc, diag::err_character_too_large);
}
+ if (!HadError && Converter) {
+ assert(Kind != tok::wide_char_constant &&
+ "Wide character translation not supported");
+ char ByteChar = *tmp_out_start;
+ SmallString<1> ConvertedChar;
+ Converter->convert(StringRef(&ByteChar, 1), ConvertedChar);
+ assert(ConvertedChar.size() == 1 &&
+ "Char size increased after translation");
+ *tmp_out_start = ConvertedChar[0];
+ }
}
}
@@ -1861,16 +1891,35 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
}
// Is this a Universal Character Name escape?
if (begin[1] == 'u' || begin[1] == 'U' || begin[1] == 'N') {
- unsigned short UcnLen = 0;
- if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
- FullSourceLoc(Loc, PP.getSourceManager()),
- &PP.getDiagnostics(), PP.getLangOpts(), true)) {
- HadError = true;
- } else if (*buffer_begin > largest_character_for_kind) {
- HadError = true;
- PP.Diag(Loc, diag::err_character_too_large);
+ if (Converter == nullptr) {
+ unsigned short UcnLen = 0;
+ if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
+ FullSourceLoc(Loc, PP.getSourceManager()),
+ &PP.getDiagnostics(), PP.getLangOpts(), true)) {
+ HadError = true;
+ } else if (*buffer_begin > largest_character_for_kind) {
+ HadError = true;
+ PP.Diag(Loc, diag::err_character_too_large);
+ }
+ } else {
+ char Cp[8];
+ char *ResultPtr = Cp;
+ unsigned CharByteWidth = 1;
+ EncodeUCNEscape(TokBegin, begin, end, ResultPtr, HadError,
+ FullSourceLoc(Loc, PP.getSourceManager()),
+ CharByteWidth, &PP.getDiagnostics(), PP.getLangOpts());
+ if (!HadError) {
+ SmallString<8> CpConv;
+ Converter->convert(StringRef(Cp), CpConv);
+ if (CpConv.size() > 1) {
+ HadError = true;
+ PP.Diag(Loc, diag::err_character_too_large);
+ } else {
+ memcpy(Cp, CpConv.data(), CpConv.size());
+ *buffer_begin = *Cp;
+ }
+ }
}
-
++buffer_begin;
continue;
}
@@ -1879,7 +1928,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
ProcessCharEscape(TokBegin, begin, end, HadError,
FullSourceLoc(Loc, PP.getSourceManager()), CharWidth,
&PP.getDiagnostics(), PP.getLangOpts(),
- StringLiteralEvalMethod::Evaluated);
+ StringLiteralEvalMethod::Evaluated, nullptr);
*buffer_begin++ = result;
}
@@ -1989,16 +2038,18 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
///
StringLiteralParser::StringLiteralParser(ArrayRef<Token> StringToks,
Preprocessor &PP,
- StringLiteralEvalMethod EvalMethod)
+ StringLiteralEvalMethod EvalMethod,
+ ConversionAction Action)
: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
- MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
- ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false),
- Pascal(false) {
- init(StringToks);
+ LiteralConv(&PP.getLiteralConverter()), MaxTokenLength(0), SizeBound(0),
+ CharByteWidth(0), Kind(tok::unknown), ResultPtr(ResultBuf.data()),
+ EvalMethod(EvalMethod), hadError(false), Pascal(false) {
+ init(StringToks, Action);
}
-void StringLiteralParser::init(ArrayRef<Token> StringToks){
+void StringLiteralParser::init(ArrayRef<Token> StringToks,
+ ConversionAction Action) {
// The literal token may have come from an invalid source location (e.g. due
// to a PCH error), in which case the token length will be 0.
if (StringToks.empty() || StringToks[0].getLength() < 2)
@@ -2090,6 +2141,10 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
SourceLocation UDSuffixTokLoc;
+ llvm::TextEncodingConverter *Converter = nullptr;
+ if (!isUTFLiteral(Kind) && LiteralConv)
+ Converter = LiteralConv->getConverter(Action);
+
for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
const char *ThisTokBuf = &TokenBuf[0];
// Get the spelling of the token, which eliminates trigraphs, etc. We know
@@ -2203,6 +2258,16 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
hadError = true;
+ if (!hadError && Converter) {
+ assert(Kind != tok::wide_string_literal &&
+ "Wide character translation not supported");
+ SmallString<256> CpConv;
+ int ResultLength = BeforeCRLF.size() * CharByteWidth;
+ char *Cp = ResultPtr - ResultLength;
+ Converter->convert(StringRef(Cp, ResultLength), CpConv);
+ memcpy(Cp, CpConv.data(), ResultLength);
+ ResultPtr = Cp + CpConv.size();
+ }
// Point into the \n inside the \r\n sequence and operate on the
// remaining portion of the literal.
RemainingTokenSpan = AfterCRLF.substr(1);
@@ -2237,26 +2302,45 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
++ThisTokBuf;
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+ int Length = ThisTokBuf - InStart;
// Copy the character span over.
if (CopyStringFragment(StringToks[i], ThisTokBegin,
StringRef(InStart, ThisTokBuf - InStart)))
hadError = true;
+
+ if (!hadError && Converter) {
+ assert(Kind != tok::wide_string_literal &&
+ "Wide character translation not supported");
+ SmallString<256> CpConv;
+ int ResultLength = Length * CharByteWidth;
+ char *Cp = ResultPtr - ResultLength;
+ Converter->convert(StringRef(Cp, ResultLength), CpConv);
+ memcpy(Cp, CpConv.data(), ResultLength);
+ ResultPtr = Cp + CpConv.size();
+ }
continue;
}
// Is this a Universal Character Name escape?
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U' ||
ThisTokBuf[1] == 'N') {
- EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
- ResultPtr, hadError,
+ char *Cp = ResultPtr;
+ EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, ResultPtr,
+ hadError,
FullSourceLoc(StringToks[i].getLocation(), SM),
CharByteWidth, Diags, Features);
+ if (!hadError && Converter) {
+ SmallString<8> CpConv;
+ Converter->convert(StringRef(Cp), CpConv);
+ memcpy(Cp, CpConv.data(), CpConv.size());
+ ResultPtr = Cp + CpConv.size();
+ }
continue;
}
// Otherwise, this is a non-UCN escape character. Process it.
- unsigned ResultChar =
- ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
- FullSourceLoc(StringToks[i].getLocation(), SM),
- CharByteWidth * 8, Diags, Features, EvalMethod);
+ unsigned ResultChar = ProcessCharEscape(
+ ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
+ FullSourceLoc(StringToks[i].getLocation(), SM), CharByteWidth * 8,
+ Diags, Features, EvalMethod, Converter);
if (CharByteWidth == 4) {
// FIXME: Make the type of the result buffer correct instead of
@@ -2454,7 +2538,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
} else {
ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8,
- Diags, Features, StringLiteralEvalMethod::Evaluated);
+ Diags, Features, StringLiteralEvalMethod::Evaluated,
+ nullptr);
--ByteNo;
}
assert(!HadError && "This method isn't valid on erroneous strings");
diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c
new file mode 100644
index 0000000000000..aab43157b1be4
--- /dev/null
+++ b/clang/test/CodeGen/systemz-charset.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s
+// RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s
+
+const char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+// CHECK: c"\C1\C2\C3\C4\C5\C6\C7\C8\C9\D1\D2\D3\D4\D5\D6\D7\D8\D9\E2\E3\E4\E5\E6\E7\E8\E9\00"
+
+const char *LowerCaseLetters = "abcdefghijklmnopqrstuvwxyz";
+//CHECK: c"\81\82\83\84\85\86\87\88\89\91\92\93\94\95\96\97\98\99\A2\A3\A4\A5\A6\A7\A8\A9\00"
+
+const char *Digits = "0123456789";
+// CHECK: c"\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\00"
+
+const char *SpecialCharacters = " .<(+|&!$*);^-/,%%_>`:#@=";
+// CHECK: c"@KLMNOPZ[\\]^_`akllmnyz{|~\00"
+
+const char *EscapeCharacters = "\a\b\f\n\r\t\v\\\'\"\?";
+//CHECK: c"/\16\0C\15\0D\05\0B\E0}\7Fo\00"
+
+const char *InvalidEscape = "\y\z";
+//CHECK: c"oo\00"
+
+const char *HexCharacters = "\x12\x13\x14";
+//CHECK: c"\12\13\14\00"
+
+const char *OctalCharacters = "\141\142\143";
+//CHECK: c"abc\00"
+
+const char singleChar = 'a';
+//CHECK: i8 -127
+
+const char *UcnCharacters = "\u00E2\u00AC\U000000DF";
+//CHECK: c"B\B0Y\00"
+
+const char *Unicode = "ΓΏ";
+//CHECK: c"\DF\00"
diff --git a/clang/test/CodeGen/systemz-charset.cpp b/clang/test/CodeGen/systemz-charset.cpp
new file mode 100644
index 0000000000000..7e66407fd2ff1
--- /dev/null
+++ b/clang/test/CodeGen/systemz-charset.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang %s -std=c++17 -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s
+
+const char *RawString = R"(Hello\n)";
+//CHECK: c"\C8\85\93\93\96\E0\95\00"
+
+const char *MultiLineRawString = R"(
+Hello
+There)";
+//CHECK: c"\15\C8\85\93\93\96\15\E3\88\85\99\85\00"
+
+char UnicodeChar8 = u8'1';
+//CHECK: i8 49
+char16_t UnicodeChar16 = u'1';
+//CHECK: i16 49
+char32_t UnicodeChar32 = U'1';
+//CHECK: i32 49
+
+const char *EscapeCharacters8 = u8"\a\b\f\n\r\t\v\\\'\"\?";
+//CHECK: c"\07\08\0C\0A\0D\09\0B\\'\22?\00"
+
+const char16_t *EscapeCharacters16 = u"\a\b\f\n\r\t\v\\\'\"\?";
+//CHECK: [12 x i16] [i16 7, i16 8, i16 12, i16 10, i16 13, i16 9, i16 11, i16 92, i16 39, i16 34, i16 63, i16 0]
+
+const char32_t *EscapeCharacters32 = U"\a\b\f\n\r\t\v\\\'\"\?";
+//CHECK: [12 x i32] [i32 7, i32 8, i32 12, i32 10, i32 13, i32 9, i32 11, i32 92, i32 39, i32 34, i32 63, i32 0]
+
+const char *UnicodeString8 = u8"Hello";
+//CHECK: c"Hello\00"
+const char16_t *UnicodeString16 = u"Hello";
+//CHECK: [6 x i16] [i16 72, i16 101, i16 108, i16 108, i16 111, i16 0]
+const char32_t *UnicodeString32 = U"Hello";
+//CHECK: [6 x i32] [i32 72, i32 101, i32 108, i32 108, i32 111, i32 0]
+
+const char *UnicodeRawString8 = u8R"("Hello\")";
+//CHECK: c"\22Hello\\\22\00"
+const char16_t *UnicodeRawString16 = uR"("Hello\")";
+//CHECK: [9 x i16] [i16 34, i16 72, i16 101, i16 108, i16 108, i16 111, i16 92, i16 34, i16 0]
+const char32_t *UnicodeRawString32 = UR"("Hello\")";
+//CHECK: [9 x i32] [i32 34, i32 72, i32 101, i32 108, i32 108, i32 111, i32 92, i32 34, i32 0]
+
+const char *UnicodeUCNString8 = u8"\u00E2\u00AC\U000000DF";
+//CHECK: c"\C3\A2\C2\AC\C3\9F\00"
+const char16_t *UnicodeUCNString16 = u"\u00E2\u00AC\U000000DF";
+//CHECK: [4 x i16] [i16 226, i16 172, i16 223, i16 0]
+const char32_t *UnicodeUCNString32 = U"\u00E2\u00AC\U000000DF";
+//CHECK: [4 x i32] [i32 226, i32 172, i32 223, i32 0]
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index 1b1169b71554a..69d7e255a807d 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -250,10 +250,11 @@
// RUN: not %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=source-charset-utf-16 %s
// source-charset-utf-16: invalid value 'utf-16' in '/source-charset:utf-16'
-// /execution-charset: should warn on everything except UTF-8.
-// RUN: not %clang_cl /execution-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-utf-16 %s
-// execution-charset-utf-16: invalid value 'utf-16' in '/execution-charset:utf-16'
+// /execution-charset: should warn on invalid charsets.
+// RUN: %clang_cl /execution-charset:invalid-charset -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-invalid %s
+// execution-charset-invalid: invalid value 'invalid-charset' in '/execution-charset:invalid-charset'
//
+
// RUN: %clang_cl /Umymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s
// RUN: %clang_cl /U mymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s
// U: "-U" "mymacro"
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index 765f9d6ae3212..3e8b232d3a6ef 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -232,8 +232,14 @@
// RUN: not %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1'
-// RUN: not %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s
-// CHECK-INVALID-EXEC-CHARSET: error: invalid value 'iso-8859-1' in '-fexec-charset=iso-8859-1'
+// RUN: %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
+// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'invalid-charset' in '-fexec-charset=invalid-charset'
+
+// Test that we support the following exec charsets.
+// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: %clang -### -S -fexec-charset=ISO8859-1 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
+// INVALID-NOT: error: invalid value
// Test that we don't error on these.
// RUN: not %clang -### -S -Werror \
@@ -247,7 +253,7 @@
// RUN: -fident -fno-ident \
// RUN: -fimplicit-templates -fno-implicit-templates \
// RUN: -finput-charset=UTF-8 \
-// RUN: -fexec-charset=UTF-8 \
+// RUN: -fexec-charset=UTF-8 \
// RUN: -fivopts -fno-ivopts \
// RUN: -fnon-call-exceptions -fno-non-call-exceptions \
// RUN: -fpermissive -fno-permissive \
diff --git a/clang/test/Preprocessor/init-s390x.c b/clang/test/Preprocessor/init-s390x.c
index a8fbde46cbb75..9ff122def913f 100644
--- a/clang/test/Preprocessor/init-s390x.c
+++ b/clang/test/Preprocessor/init-s390x.c
@@ -206,4 +206,5 @@
// S390X-ZOS: #define __TOS_390__ 1
// S390X-ZOS: #define __TOS_MVS__ 1
// S390X-ZOS: #define __XPLINK__ 1
+// S390X-ZOS: #define __clang_literal_encoding__ IBM-1047
// S390X-ZOS-GNUXX: #define __wchar_t 1
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index dc8cd86d2a69a..21feed68d0b2e 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -506,6 +506,9 @@ class Triple {
/// For example, "fooos1.2.3" would return "1.2.3".
LLVM_ABI StringRef getEnvironmentVersionString() const;
+ /// getSystemCharset - Get the system charset of the triple.
+ StringRef getSystemCharset() const;
+
/// @}
/// @name Convenience Predicates
/// @{
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index 1068ce422d9d0..9e681926de9d7 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1417,6 +1417,13 @@ StringRef Triple::getOSAndEnvironmentName() const {
return Tmp.split('-').second; // Strip second component
}
+// System charset on z/OS is IBM-1047 and UTF-8 otherwise
+StringRef Triple::getSystemCharset() const {
+ if (getOS() == llvm::Triple::ZOS)
+ return "IBM-1047";
+ return "UTF-8";
+}
+
static VersionTuple parseVersionFromName(StringRef Name) {
VersionTuple Version;
Version.tryParse(Name);
>From 6a2dec409e69bfee7deeb58ecadf96a2c0c7da4c Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Wed, 11 Jun 2025 11:22:40 -0400
Subject: [PATCH 2/3] replace StringMap with pointer
---
clang/docs/LanguageExtensions.rst | 5 +-
clang/include/clang/Basic/LangOptions.h | 4 +-
clang/include/clang/Driver/Options.td | 2 +-
clang/include/clang/Lex/LiteralConverter.h | 13 ++---
clang/include/clang/Lex/LiteralSupport.h | 2 +-
clang/lib/Driver/ToolChains/Clang.cpp | 8 +--
clang/lib/Frontend/InitPreprocessor.cpp | 6 +-
clang/lib/Lex/LiteralConverter.cpp | 66 ++++++++--------------
clang/lib/Lex/LiteralSupport.cpp | 2 +-
llvm/include/llvm/TargetParser/Triple.h | 4 +-
llvm/lib/TargetParser/Triple.cpp | 4 +-
11 files changed, 50 insertions(+), 66 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 960877389ab36..960c90ad80854 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -419,8 +419,9 @@ Builtin Macros
``__clang_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
- narrow string literals, e.g., ``"hello"``. This macro typically expands to
- the text encoding specified by -fexec-charset if specified, or the system charset.
+ narrow string literals, e.g., ``"hello"``. This macro expands to the text
+ encoding specified by ``-fexec-charset`` if any, or a system-specific default
+ otherwise: ``"IBM-1047"`` on z/OS and ``"UTF-8"`` on all other systems.
``__clang_wide_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 482e621b04ba6..346c45acb753b 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -565,8 +565,8 @@ class LangOptions : public LangOptionsBase {
bool AtomicFineGrainedMemory = false;
bool AtomicIgnoreDenormalMode = false;
- /// Name of the exec charset to convert the internal charset to.
- std::string ExecCharset;
+ /// Name of the execution encoding to convert the internal encoding to.
+ std::string ExecEncoding;
LangOptions();
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a960283d418aa..81caba8f5c1fe 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7471,7 +7471,7 @@ def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<charset>">,
HelpText<"Set the execution <charset> for string and character literals. "
"Supported character encodings include ISO8859-1, UTF-8, IBM-1047 "
"and those supported by the host icu or iconv library.">,
- MarshallingInfoString<LangOpts<"ExecCharset">>;
+ MarshallingInfoString<LangOpts<"ExecEncoding">>;
def target_cpu : Separate<["-"], "target-cpu">,
HelpText<"Target a specific cpu type">,
MarshallingInfoString<TargetOpts<"CPU">>;
diff --git a/clang/include/clang/Lex/LiteralConverter.h b/clang/include/clang/Lex/LiteralConverter.h
index 999b2c146930f..ee489bf6ce510 100644
--- a/clang/include/clang/Lex/LiteralConverter.h
+++ b/clang/include/clang/Lex/LiteralConverter.h
@@ -16,18 +16,17 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/TextEncoding.h"
-enum ConversionAction { NoConversion, ToSystemCharset, ToExecCharset };
+enum ConversionAction { NoConversion, ToSystemEncoding, ToExecEncoding };
class LiteralConverter {
- llvm::StringRef InternalCharset;
- llvm::StringRef SystemCharset;
- llvm::StringRef ExecCharset;
- llvm::StringMap<llvm::TextEncodingConverter> TextEncodingConverters;
+ llvm::StringRef InternalEncoding;
+ llvm::StringRef SystemEncoding;
+ llvm::StringRef ExecEncoding;
+ llvm::TextEncodingConverter *ToSystemEncodingConverter;
+ llvm::TextEncodingConverter *ToExecEncodingConverter;
public:
- llvm::TextEncodingConverter *getConverter(const char *Codepage);
llvm::TextEncodingConverter *getConverter(ConversionAction Action);
- llvm::TextEncodingConverter *createAndInsertCharConverter(const char *To);
void setConvertersFromOptions(const clang::LangOptions &Opts,
const clang::TargetInfo &TInfo,
clang::DiagnosticsEngine &Diags);
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index eaa2016c6a888..af0296912b8df 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -251,7 +251,7 @@ class StringLiteralParser {
StringLiteralParser(
ArrayRef<Token> StringToks, Preprocessor &PP,
StringLiteralEvalMethod StringMethod = StringLiteralEvalMethod::Evaluated,
- ConversionAction Action = ToExecCharset);
+ ConversionAction Action = ToExecEncoding);
StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
const LangOptions &features, const TargetInfo &target,
DiagnosticsEngine *diags = nullptr)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index dbceaf8f959bb..46bb7c41c3b06 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7419,9 +7419,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// Set the default fexec-charset as the system charset.
CmdArgs.push_back("-fexec-charset");
- CmdArgs.push_back(Args.MakeArgString(Triple.getSystemCharset()));
- if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
- StringRef value = execCharset->getValue();
+ CmdArgs.push_back(Args.MakeArgString(Triple.getDefaultTextEncoding()));
+ if (Arg *execEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
+ StringRef value = execEncoding->getValue();
llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
llvm::TextEncodingConverter::create("UTF-8", value.data());
if (ErrorOrConverter) {
@@ -7429,7 +7429,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(value));
} else {
D.Diag(diag::err_drv_invalid_value)
- << execCharset->getAsString(Args) << value;
+ << execEncoding->getAsString(Args) << value;
}
}
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 1e4030622165e..960fbee9307f2 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1023,11 +1023,11 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
// Macros to help identify the narrow and wide character sets. This is set
// to fexec-charset. If fexec-charset is not specified, the default is the
// system charset.
- if (!LangOpts.ExecCharset.empty())
- Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecCharset);
+ if (!LangOpts.ExecEncoding.empty())
+ Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecEncoding);
else
Builder.defineMacro("__clang_literal_encoding__",
- TI.getTriple().getSystemCharset());
+ TI.getTriple().getDefaultTextEncoding());
if (TI.getTypeWidth(TI.getWCharType()) >= 32) {
// FIXME: 32-bit wchar_t signals UTF-32. This may change
// if -fwide-exec-charset= is ever supported.
diff --git a/clang/lib/Lex/LiteralConverter.cpp b/clang/lib/Lex/LiteralConverter.cpp
index b00f44a238ec0..e9f8981336e8e 100644
--- a/clang/lib/Lex/LiteralConverter.cpp
+++ b/clang/lib/Lex/LiteralConverter.cpp
@@ -11,59 +11,43 @@
using namespace llvm;
-llvm::TextEncodingConverter *
-LiteralConverter::getConverter(const char *Codepage) {
- auto Iter = TextEncodingConverters.find(Codepage);
- if (Iter != TextEncodingConverters.end())
- return &Iter->second;
- return nullptr;
-}
-
llvm::TextEncodingConverter *
LiteralConverter::getConverter(ConversionAction Action) {
- StringRef CodePage;
- if (Action == ToSystemCharset)
- CodePage = SystemCharset;
- else if (Action == ToExecCharset)
- CodePage = ExecCharset;
+ if (Action == ToSystemEncoding)
+ return ToSystemEncodingConverter;
+ else if (Action == ToExecEncoding)
+ return ToExecEncodingConverter;
else
- CodePage = InternalCharset;
- return getConverter(CodePage.data());
-}
-
-llvm::TextEncodingConverter *
-LiteralConverter::createAndInsertCharConverter(const char *To) {
- const char *From = InternalCharset.data();
- llvm::TextEncodingConverter *Converter = getConverter(To);
- if (Converter)
- return Converter;
-
- ErrorOr<TextEncodingConverter> ErrorOrConverter =
- llvm::TextEncodingConverter::create(From, To);
- if (!ErrorOrConverter)
return nullptr;
- TextEncodingConverters.insert_or_assign(StringRef(To),
- std::move(*ErrorOrConverter));
- return getConverter(To);
}
void LiteralConverter::setConvertersFromOptions(
const clang::LangOptions &Opts, const clang::TargetInfo &TInfo,
clang::DiagnosticsEngine &Diags) {
using namespace llvm;
- SystemCharset = TInfo.getTriple().getSystemCharset();
- InternalCharset = "UTF-8";
- ExecCharset = Opts.ExecCharset.empty() ? InternalCharset : Opts.ExecCharset;
- // Create converter between internal and system charset
- if (InternalCharset != SystemCharset)
- createAndInsertCharConverter(SystemCharset.data());
+ InternalEncoding = "UTF-8";
+ SystemEncoding = TInfo.getTriple().getDefaultTextEncoding();
+ ExecEncoding =
+ Opts.ExecEncoding.empty() ? InternalEncoding : Opts.ExecEncoding;
+ // Create converter between internal and system encoding
+ if (InternalEncoding != SystemEncoding) {
+ ErrorOr<TextEncodingConverter> ErrorOrConverter =
+ llvm::TextEncodingConverter::create(InternalEncoding, SystemEncoding);
+ if (!ErrorOrConverter)
+ return;
+ ToSystemEncodingConverter =
+ new TextEncodingConverter(std::move(*ErrorOrConverter));
+ }
- // Create converter between internal and exec charset specified
+ // Create converter between internal and exec encoding specified
// in fexec-charset option.
- if (InternalCharset == ExecCharset)
+ if (InternalEncoding == ExecEncoding)
return;
- if (!createAndInsertCharConverter(ExecCharset.data())) {
+ ErrorOr<TextEncodingConverter> ErrorOrConverter =
+ llvm::TextEncodingConverter::create(InternalEncoding, ExecEncoding);
+ if (!ErrorOrConverter)
Diags.Report(clang::diag::err_drv_invalid_value)
- << "-fexec-charset" << ExecCharset;
- }
+ << "-fexec-charset" << ExecEncoding;
+ ToExecEncodingConverter =
+ new TextEncodingConverter(std::move(*ErrorOrConverter));
}
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index e6242b3af4730..982d6e0e73977 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -1835,7 +1835,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
llvm::TextEncodingConverter *Converter = nullptr;
if (!isUTFLiteral(Kind) && LiteralConv)
- Converter = LiteralConv->getConverter(ToExecCharset);
+ Converter = LiteralConv->getConverter(ToExecEncoding);
while (begin != end) {
// Is this a span of non-escape characters?
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 21feed68d0b2e..7932c600b8d81 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -506,8 +506,8 @@ class Triple {
/// For example, "fooos1.2.3" would return "1.2.3".
LLVM_ABI StringRef getEnvironmentVersionString() const;
- /// getSystemCharset - Get the system charset of the triple.
- StringRef getSystemCharset() const;
+ /// getDefaultTextEncoding - Get the default encoding of the triple.
+ StringRef getDefaultTextEncoding() const;
/// @}
/// @name Convenience Predicates
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index 9e681926de9d7..f0945e3a6db1d 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1417,8 +1417,8 @@ StringRef Triple::getOSAndEnvironmentName() const {
return Tmp.split('-').second; // Strip second component
}
-// System charset on z/OS is IBM-1047 and UTF-8 otherwise
-StringRef Triple::getSystemCharset() const {
+// Default encoding on z/OS is IBM-1047 and UTF-8 otherwise
+StringRef Triple::getDefaultTextEncoding() const {
if (getOS() == llvm::Triple::ZOS)
return "IBM-1047";
return "UTF-8";
>From 9c5cc8c7f58e80bcf80f533ffbd10b3d551cd570 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 17 Oct 2025 12:14:09 -0400
Subject: [PATCH 3/3] address comments
---
clang/include/clang/Basic/TokenKinds.h | 7 ++++++-
clang/include/clang/Driver/Options.td | 4 ++--
clang/include/clang/Lex/LiteralConverter.h | 6 +++++-
clang/include/clang/Lex/LiteralSupport.h | 4 ++--
clang/lib/Driver/ToolChains/Clang.cpp | 2 +-
clang/lib/Frontend/InitPreprocessor.cpp | 2 +-
clang/lib/Lex/LiteralConverter.cpp | 6 +++---
clang/lib/Lex/LiteralSupport.cpp | 14 ++++++-------
clang/test/Driver/clang_f_opts.c | 24 ++++++++++++----------
llvm/include/llvm/TargetParser/Triple.h | 4 ++--
llvm/lib/TargetParser/Triple.cpp | 2 +-
11 files changed, 43 insertions(+), 32 deletions(-)
diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h
index bdf95b149fc35..27c3ecf7f4ae2 100644
--- a/clang/include/clang/Basic/TokenKinds.h
+++ b/clang/include/clang/Basic/TokenKinds.h
@@ -111,13 +111,18 @@ inline bool isLiteral(TokenKind K) {
return isInLiteralRange;
}
-/// Return true if this is a utf literal kind.
+/// Return true if this is a UTF literal kind.
inline bool isUTFLiteral(TokenKind K) {
return K == tok::utf8_char_constant || K == tok::utf8_string_literal ||
K == tok::utf16_char_constant || K == tok::utf16_string_literal ||
K == tok::utf32_char_constant || K == tok::utf32_string_literal;
}
+/// Return true if this is a wide literal kind.
+inline bool isWideLiteral(TokenKind K) {
+ return K == tok::wide_char_constant || K == tok::wide_string_literal;
+}
+
/// Return true if this is any of tok::annot_* kinds.
bool isAnnotation(TokenKind K);
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 81caba8f5c1fe..e6eee6ef609c8 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7469,8 +7469,8 @@ def tune_cpu : Separate<["-"], "tune-cpu">,
MarshallingInfoString<TargetOpts<"TuneCPU">>;
def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<charset>">,
HelpText<"Set the execution <charset> for string and character literals. "
- "Supported character encodings include ISO8859-1, UTF-8, IBM-1047 "
- "and those supported by the host icu or iconv library.">,
+ "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
+ "and possibly those supported by ICU or the host iconv library.">,
MarshallingInfoString<LangOpts<"ExecEncoding">>;
def target_cpu : Separate<["-"], "target-cpu">,
HelpText<"Target a specific cpu type">,
diff --git a/clang/include/clang/Lex/LiteralConverter.h b/clang/include/clang/Lex/LiteralConverter.h
index ee489bf6ce510..6add678a2edc0 100644
--- a/clang/include/clang/Lex/LiteralConverter.h
+++ b/clang/include/clang/Lex/LiteralConverter.h
@@ -16,7 +16,11 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/TextEncoding.h"
-enum ConversionAction { NoConversion, ToSystemEncoding, ToExecEncoding };
+enum ConversionAction {
+ CA_NoConversion,
+ CA_ToSystemEncoding,
+ CA_ToExecEncoding
+};
class LiteralConverter {
llvm::StringRef InternalEncoding;
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index af0296912b8df..32ae829096592 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -251,7 +251,7 @@ class StringLiteralParser {
StringLiteralParser(
ArrayRef<Token> StringToks, Preprocessor &PP,
StringLiteralEvalMethod StringMethod = StringLiteralEvalMethod::Evaluated,
- ConversionAction Action = ToExecEncoding);
+ ConversionAction Action = CA_ToExecEncoding);
StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
const LangOptions &features, const TargetInfo &target,
DiagnosticsEngine *diags = nullptr)
@@ -260,7 +260,7 @@ class StringLiteralParser {
Kind(tok::unknown), ResultPtr(ResultBuf.data()),
EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
Pascal(false) {
- init(StringToks, NoConversion);
+ init(StringToks, CA_NoConversion);
}
bool hadError;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 46bb7c41c3b06..d49545dfca7c4 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7419,7 +7419,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// Set the default fexec-charset as the system charset.
CmdArgs.push_back("-fexec-charset");
- CmdArgs.push_back(Args.MakeArgString(Triple.getDefaultTextEncoding()));
+ CmdArgs.push_back(Args.MakeArgString(Triple.getDefaultNarrowTextEncoding()));
if (Arg *execEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
StringRef value = execEncoding->getValue();
llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 960fbee9307f2..3d1d09272deec 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1027,7 +1027,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecEncoding);
else
Builder.defineMacro("__clang_literal_encoding__",
- TI.getTriple().getDefaultTextEncoding());
+ TI.getTriple().getDefaultNarrowTextEncoding());
if (TI.getTypeWidth(TI.getWCharType()) >= 32) {
// FIXME: 32-bit wchar_t signals UTF-32. This may change
// if -fwide-exec-charset= is ever supported.
diff --git a/clang/lib/Lex/LiteralConverter.cpp b/clang/lib/Lex/LiteralConverter.cpp
index e9f8981336e8e..ff87cf5eaad8b 100644
--- a/clang/lib/Lex/LiteralConverter.cpp
+++ b/clang/lib/Lex/LiteralConverter.cpp
@@ -13,9 +13,9 @@ using namespace llvm;
llvm::TextEncodingConverter *
LiteralConverter::getConverter(ConversionAction Action) {
- if (Action == ToSystemEncoding)
+ if (Action == CA_ToSystemEncoding)
return ToSystemEncodingConverter;
- else if (Action == ToExecEncoding)
+ else if (Action == CA_ToExecEncoding)
return ToExecEncodingConverter;
else
return nullptr;
@@ -26,7 +26,7 @@ void LiteralConverter::setConvertersFromOptions(
clang::DiagnosticsEngine &Diags) {
using namespace llvm;
InternalEncoding = "UTF-8";
- SystemEncoding = TInfo.getTriple().getDefaultTextEncoding();
+ SystemEncoding = TInfo.getTriple().getDefaultNarrowTextEncoding();
ExecEncoding =
Opts.ExecEncoding.empty() ? InternalEncoding : Opts.ExecEncoding;
// Create converter between internal and system encoding
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 982d6e0e73977..c5ef0baa7fa4e 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -147,7 +147,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
// that would have been \", which would not have been the end of string.
unsigned ResultChar = *ThisTokBuf++;
char Escape = ResultChar;
- bool Translate = true;
+ bool Transcode = true;
bool Invalid = false;
switch (ResultChar) {
// These map to themselves.
@@ -189,7 +189,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
ResultChar = 11;
break;
case 'x': { // Hex escape.
- Translate = false;
+ Transcode = false;
ResultChar = 0;
if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
Delimited = true;
@@ -253,7 +253,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
case '4': case '5': case '6': case '7': {
// Octal escapes.
--ThisTokBuf;
- Translate = false;
+ Transcode = false;
ResultChar = 0;
// Octal escapes are a series of octal digits with maximum length 3.
@@ -373,7 +373,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
HadError = true;
}
- if (Translate && Converter) {
+ if (Transcode && Converter) {
// Invalid escapes are written as '?' and then translated.
char ByteChar = Invalid ? '?' : ResultChar;
SmallString<8> ResultCharConv;
@@ -1834,8 +1834,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
}
llvm::TextEncodingConverter *Converter = nullptr;
- if (!isUTFLiteral(Kind) && LiteralConv)
- Converter = LiteralConv->getConverter(ToExecEncoding);
+ if (!isUTFLiteral(Kind) && !isWideLiteral(Kind) && LiteralConv)
+ Converter = LiteralConv->getConverter(CA_ToExecEncoding);
while (begin != end) {
// Is this a span of non-escape characters?
@@ -2142,7 +2142,7 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks,
SourceLocation UDSuffixTokLoc;
llvm::TextEncodingConverter *Converter = nullptr;
- if (!isUTFLiteral(Kind) && LiteralConv)
+ if (!isUTFLiteral(Kind) && !isWideLiteral(Kind) && LiteralConv)
Converter = LiteralConv->getConverter(Action);
for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index 3e8b232d3a6ef..819baf4ed9a4c 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -229,17 +229,19 @@
// RUN: %clang -S -O20 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-O %s
// CHECK-INVALID-O: warning: optimization level '-O20' is not supported; using '-O3' instead
-// RUN: not %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
-// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1'
-
-// RUN: %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
-// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'invalid-charset' in '-fexec-charset=invalid-charset'
-
-// Test that we support the following exec charsets.
-// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
-// RUN: %clang -### -S -fexec-charset=ISO8859-1 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
-// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=INVALID %s
-// INVALID-NOT: error: invalid value
+// RUN: not %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s
+// CHECK-INVALID-EXEC-CHARSET: error: invalid value 'invalid-charset' in '-fexec-charset=invalid-charset'
+// Test that we support the following exec charsets. The preferred MIME name is
+// `IBM1047`, but `IBM-1047` is the name used by z/OS USS utilities such as
+// `chtag`.
+// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-UTF-8 %s
+// RUN: %clang -### -S -fexec-charset=ISO-8859-1 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-ISO-8859-1 %s
+// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-IBM-1047 %s
+// RUN: %clang -### -S -fexec-charset=IBM1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-IBM1047 %s
+// CHECK-EXEC-CHARSET-UTF-8: "-fexec-charset" "UTF-8"
+// CHECK-EXEC-CHARSET-ISO-8859-1: "-fexec-charset" "ISO-8859-1"
+// CHECK-EXEC-CHARSET-IBM-1047: "-fexec-charset" "IBM-1047"
+// CHECK-EXEC-CHARSET-IBM1047: "-fexec-charset" "IBM1047"
// Test that we don't error on these.
// RUN: not %clang -### -S -Werror \
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 7932c600b8d81..e5bf07d2a7ebd 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -506,8 +506,8 @@ class Triple {
/// For example, "fooos1.2.3" would return "1.2.3".
LLVM_ABI StringRef getEnvironmentVersionString() const;
- /// getDefaultTextEncoding - Get the default encoding of the triple.
- StringRef getDefaultTextEncoding() const;
+ /// getDefaultNarrowTextEncoding - Get the default encoding of the triple.
+ StringRef getDefaultNarrowTextEncoding() const;
/// @}
/// @name Convenience Predicates
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index f0945e3a6db1d..892a817f9168f 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1418,7 +1418,7 @@ StringRef Triple::getOSAndEnvironmentName() const {
}
// Default encoding on z/OS is IBM-1047 and UTF-8 otherwise
-StringRef Triple::getDefaultTextEncoding() const {
+StringRef Triple::getDefaultNarrowTextEncoding() const {
if (getOS() == llvm::Triple::ZOS)
return "IBM-1047";
return "UTF-8";
More information about the cfe-commits
mailing list