[llvm-branch-commits] [clang] [llvm] Enable driver changes for fexec-charset (PR #195890)
Abhina Sree via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 22 06:06:57 PDT 2026
https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/195890
>From 95f9098c3b80e8ee4f2532f8f9feeb798014e0e2 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Fri, 8 May 2026 12:21:38 -0400
Subject: [PATCH] Enable driver changes for fexec-charset
---
clang/docs/LanguageExtensions.rst | 6 +++---
.../clang/Basic/DiagnosticDriverKinds.td | 1 +
clang/include/clang/Options/Options.td | 18 +++++++++++++----
clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++++------
clang/test/CodeGen/systemz-charset.c | 1 +
clang/test/Driver/cl-options.c | 7 ++++---
clang/test/Driver/clang_f_opts.c | 14 ++++++++++---
llvm/include/llvm/Support/TextEncoding.h | 2 ++
llvm/lib/Support/TextEncoding.cpp | 10 ++++++++++
9 files changed, 60 insertions(+), 19 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 03cb02deb5e7f..f2aca70d9d57d 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -421,9 +421,9 @@ Builtin Macros
``__clang_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
- narrow string literals, e.g., ``"hello"``. This macro typically expands to
- "UTF-8" (but may change in the future if the
- ``-fexec-charset="Encoding-Name"`` option is implemented.)
+ narrow string literals, e.g., ``"hello"``. This macro expands to the text
+ encoding specified by ``-fexec-charset`` if any, or a system-specific default
+ otherwise: ``"IBM-1047"`` on z/OS and ``"UTF-8"`` on all other systems.
``__clang_wide_literal_encoding__``
Defined to a narrow string literal that represents the current encoding of
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 114ee475c371f..16460583b3770 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -149,6 +149,7 @@ def warn_drv_unsupported_option_part_for_target : Warning<
InGroup<OptionIgnored>;
def err_drv_unsupported_option_part_for_target : Error<
"'%0' in '%1' option is not currently supported for target '%2'">;
+def err_drv_unsupported_encoding : Error<"'%0' is not a supported encoding">;
def warn_drv_invalid_argument_for_flang : Warning<
"'%0' is not valid for Fortran">,
InGroup<OptionIgnored>;
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 73bce00b921ea..95163962da647 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -3152,7 +3152,13 @@ def fexperimental_strict_floating_point : Flag<["-"], "fexperimental-strict-floa
def finput_charset_EQ : Joined<["-"], "finput-charset=">,
Visibility<[ClangOption, FlangOption, FC1Option]>, Group<f_Group>,
HelpText<"Specify the default character set for source files">;
-def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>;
+def fexec_charset_EQ
+ : Joined<["-"], "fexec-charset=">,
+ Group<f_Group>,
+ HelpText<
+ "Set the execution <encoding> for string and character literals. "
+ "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
+ "and possibly those supported by ICU or the host iconv library.">;
def finstrument_functions
: Flag<["-"], "finstrument-functions">,
Group<f_Group>,
@@ -9163,9 +9169,13 @@ def _SLASH_showFilenames_ : CLFlag<"showFilenames-">,
def _SLASH_source_charset : CLCompileJoined<"source-charset:">,
HelpText<"Set source encoding, supports only UTF-8">,
Alias<finput_charset_EQ>;
-def _SLASH_execution_charset : CLCompileJoined<"execution-charset:">,
- HelpText<"Set runtime encoding, supports only UTF-8">,
- Alias<fexec_charset_EQ>;
+def _SLASH_execution_charset
+ : CLCompileJoined<"execution-charset:">,
+ HelpText<
+ "Set the execution <encoding> for string and character literals. "
+ "Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
+ "and possibly those supported by ICU or the host iconv library.">,
+ Alias<fexec_charset_EQ>;
def _SLASH_std : CLCompileJoined<"std:">,
HelpText<"Set language version (c++14,c++17,c++20,c++23preview,c++latest,c11,c17)">;
def _SLASH_U : CLJoinedOrSeparate<"U">, HelpText<"Undefine macro">,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bdffa4fdd7e6b..fe78b60096aa3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -52,6 +52,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/TextEncoding.h"
#include "llvm/Support/YAMLParser.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
@@ -7755,12 +7756,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
<< value;
}
- // -fexec_charset=UTF-8 is default. Reject others
- if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
- StringRef value = execCharset->getValue();
- if (!value.equals_insensitive("utf-8"))
- D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
- << value;
+ if (Arg *ExecEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
+ StringRef Value = ExecEncoding->getValue();
+ if (llvm::TextEncodingConverter::isEncodingSupported(Value)) {
+ CmdArgs.push_back("-fexec-charset");
+ CmdArgs.push_back(Args.MakeArgString(Value));
+ } else {
+ D.Diag(diag::err_drv_unsupported_encoding) << Value;
+ }
+ } else {
+ // Set the default fexec-charset as the system charset.
+ CmdArgs.push_back("-fexec-charset");
+ CmdArgs.push_back(
+ Args.MakeArgString(Triple.getDefaultNarrowTextEncoding()));
}
RenderDiagnosticsOptions(D, Args, CmdArgs);
diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c
index 78ae3353224af..dbb36aed49990 100644
--- a/clang/test/CodeGen/systemz-charset.c
+++ b/clang/test/CodeGen/systemz-charset.c
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s
+// RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s
// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset UTF-8 -DIBM1047_ONLY=1 -o - | FileCheck %s --check-prefix=CHECK-UTF8
int printf(char const *, ...);
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index c0f57ae768252..1a2827012549d 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -250,10 +250,11 @@
// RUN: not %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=source-charset-utf-16 %s
// source-charset-utf-16: invalid value 'utf-16' in '/source-charset:utf-16'
-// /execution-charset: should warn on everything except UTF-8.
-// RUN: not %clang_cl /execution-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-utf-16 %s
-// execution-charset-utf-16: invalid value 'utf-16' in '/execution-charset:utf-16'
+// /execution-charset: should warn on invalid charsets.
+// RUN: not %clang_cl /execution-charset:invalid-charset -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-invalid %s
+// execution-charset-invalid: 'invalid-charset' is not a supported encoding
//
+
// RUN: %clang_cl /Umymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s
// RUN: %clang_cl /U mymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s
// U: "-U" "mymacro"
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index 5871f1580d6b7..7fe67068118fc 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -232,8 +232,16 @@
// RUN: not %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s
// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1'
-// RUN: not %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s
-// CHECK-INVALID-EXEC-CHARSET: error: invalid value 'iso-8859-1' in '-fexec-charset=iso-8859-1'
+// RUN: not %clang -### -S -fexec-charset=invalid-charset -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-EXEC-CHARSET %s
+// CHECK-INVALID-EXEC-CHARSET: error: 'invalid-charset' is not a supported encoding
+
+// Test that we support the following exec charsets. The preferred MIME name is
+// `IBM1047`, but `IBM-1047` is the name used by z/OS USS utilities such as
+// `chtag`.
+// RUN: %clang -### -S -fexec-charset=UTF-8 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-UTF-8 %s
+// RUN: %clang -### -S -fexec-charset=IBM-1047 -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-EXEC-CHARSET-IBM-1047 %s
+// CHECK-EXEC-CHARSET-UTF-8: "-fexec-charset" "UTF-8"
+// CHECK-EXEC-CHARSET-IBM-1047: "-fexec-charset" "IBM-1047"
// Test that we don't error on these.
// RUN: not %clang -### -S -Werror \
@@ -247,7 +255,7 @@
// RUN: -fident -fno-ident \
// RUN: -fimplicit-templates -fno-implicit-templates \
// RUN: -finput-charset=UTF-8 \
-// RUN: -fexec-charset=UTF-8 \
+// RUN: -fexec-charset=UTF-8 \
// RUN: -fivopts -fno-ivopts \
// RUN: -fnon-call-exceptions -fno-non-call-exceptions \
// RUN: -fpermissive -fno-permissive \
diff --git a/llvm/include/llvm/Support/TextEncoding.h b/llvm/include/llvm/Support/TextEncoding.h
index 8f5a6122ede45..c120e36d1de6c 100644
--- a/llvm/include/llvm/Support/TextEncoding.h
+++ b/llvm/include/llvm/Support/TextEncoding.h
@@ -145,6 +145,8 @@ class TextEncodingConverter {
return Result[0];
return '\0';
}
+
+ LLVM_ABI static bool isEncodingSupported(StringRef Name);
};
} // namespace llvm
diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp
index 5c1d9696686a2..475799df9070b 100644
--- a/llvm/lib/Support/TextEncoding.cpp
+++ b/llvm/lib/Support/TextEncoding.cpp
@@ -58,6 +58,16 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) {
return std::nullopt;
}
+bool TextEncodingConverter::isEncodingSupported(StringRef Name) {
+ if (getKnownEncoding(Name))
+ return true;
+ llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
+ llvm::TextEncodingConverter::create("UTF-8", Name.data());
+ if (ErrorOrConverter)
+ return true;
+ return false;
+}
+
[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,
size_t &OutputLength,
SmallVectorImpl<char> &Result) {
More information about the llvm-branch-commits
mailing list