[clang] [llvm] [SystemZ][z/OS] Support both EBCDIC & ASCII form of type_info::name() (PR #179687)
Abhina Sree via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 4 07:28:04 PST 2026
https://github.com/abhina-sree created https://github.com/llvm/llvm-project/pull/179687
On z/OS, typename is stored as 2 encodings: EBCDIC (default system encoding) followed by ASCII.
>From 49bac51932ff0a0af4e8066614f63b6427ff05b9 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Wed, 4 Feb 2026 10:24:38 -0500
Subject: [PATCH] Support both EBCDIC & ASCII form of type_info::name()
---
clang/lib/CodeGen/ItaniumCXXABI.cpp | 17 +++++++++++++++--
clang/test/CodeGenCXX/zos-typename.cpp | 14 ++++++++++++++
llvm/include/llvm/TargetParser/Triple.h | 4 ++++
llvm/lib/TargetParser/Triple.cpp | 7 +++++++
4 files changed, 40 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGenCXX/zos-typename.cpp
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index a6c80cd083bb8..07ff8da12004c 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/TextEncoding.h"
#include <optional>
@@ -3619,8 +3620,20 @@ llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
// We know that the mangled name of the type starts at index 4 of the
// mangled name of the typename, so we can just index into it in order to
// get the mangled name of the type.
- llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
- Name.substr(4));
+ llvm::Constant *Init;
+ if (CGM.getTriple().isOSzOS()) {
+ // On z/OS, typename is stored as 2 encodings: EBCDIC followed by ASCII.
+ SmallString<256> DualEncodedName;
+ llvm::ErrorOr<llvm::TextEncodingConverter> Converter =
+ llvm::TextEncodingConverter::create(
+ "UTF-8", CGM.getTriple().getDefaultNarrowTextEncoding());
+ Converter->convert(Name.substr(4), DualEncodedName);
+ DualEncodedName += '\0';
+ DualEncodedName += Name.substr(4);
+ Init = llvm::ConstantDataArray::getString(VMContext, DualEncodedName);
+ } else
+ Init = llvm::ConstantDataArray::getString(VMContext, Name.substr(4));
+
auto Align = CGM.getContext().getTypeAlignInChars(CGM.getContext().CharTy);
llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
diff --git a/clang/test/CodeGenCXX/zos-typename.cpp b/clang/test/CodeGenCXX/zos-typename.cpp
new file mode 100644
index 0000000000000..dabb9cc25ae60
--- /dev/null
+++ b/clang/test/CodeGenCXX/zos-typename.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang -S -emit-llvm -target s390x-none-zos -I%S -fexec-charset=UTF-8 %s -o - | FileCheck %s
+// RUN: %clang -S -emit-llvm -target s390x-none-zos -I%S %s -o - | FileCheck %s
+// RUN: %clang -S -emit-llvm -target s390x-none-zos -I%S -m32 %s -o - | FileCheck %s
+
+#include <typeinfo>
+
+class TestClass {};
+struct TestStruct {};
+
+const char *A = typeid(TestClass).name();
+const char *B = typeid(TestStruct).name();
+
+// CHECK: @_ZTS9TestClass = {{.*}} c"\F9\E3\85\A2\A3\C3\93\81\A2\A2\009TestClass\00"
+// CHECK: @_ZTS10TestStruct = {{.*}} c"\F1\F0\E3\85\A2\A3\E2\A3\99\A4\83\A3\0010TestStruct\00"
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 8559d7b088ee1..463323e379765 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -506,6 +506,10 @@ class Triple {
/// string (separated by a '-' if the environment component is present).
LLVM_ABI StringRef getOSAndEnvironmentName() const;
+ /// Get the default system encoding of the triple.
+ /// For example, "IBM-1047" for z/OS, "UTF-8" for others
+ LLVM_ABI StringRef getDefaultNarrowTextEncoding() const;
+
/// Get the version component of the environment component as a single
/// string (the version after the environment).
///
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index e6a9eedab5954..8a094b2f2d9ec 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1437,6 +1437,13 @@ StringRef Triple::getOSAndEnvironmentName() const {
return Tmp.split('-').second; // Strip second component
}
+// Default encoding on z/OS is IBM-1047 and UTF-8 otherwise
+StringRef Triple::getDefaultNarrowTextEncoding() const {
+ if (getOS() == llvm::Triple::ZOS)
+ return "IBM-1047";
+ return "UTF-8";
+}
+
static VersionTuple parseVersionFromName(StringRef Name) {
VersionTuple Version;
Version.tryParse(Name);
More information about the cfe-commits
mailing list