[clang] [llvm] [SystemZ][z/OS] Complete EBCDIC I/O support (PR #75212)

Abhina Sree via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 12 11:04:27 PST 2023


https://github.com/abhina-sree updated https://github.com/llvm/llvm-project/pull/75212

>From 7bac35e24c3de0ed8370055bc4e4f1ea27d8f694 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan at ibm.com>
Date: Tue, 12 Dec 2023 11:12:50 -0500
Subject: [PATCH 1/2] Continue adding EBCDIC I/O support

---
 clang/tools/c-arcmt-test/c-arcmt-test.c | 11 +++-
 clang/tools/c-index-test/c-index-test.c |  9 +++
 llvm/include/llvm/Support/AutoConvert.h | 25 ++++++++-
 llvm/lib/Support/AutoConvert.cpp        | 73 +++++++++++++++++++++----
 llvm/lib/Support/InitLLVM.cpp           | 41 +++++++++++++-
 llvm/lib/Support/Unix/Program.inc       |  5 ++
 llvm/lib/Support/raw_ostream.cpp        |  9 +++
 llvm/utils/count/CMakeLists.txt         |  4 ++
 llvm/utils/count/count.c                | 10 +++-
 9 files changed, 170 insertions(+), 17 deletions(-)

diff --git a/clang/tools/c-arcmt-test/c-arcmt-test.c b/clang/tools/c-arcmt-test/c-arcmt-test.c
index 3bbb2d5d6a856..00999f188c7dc 100644
--- a/clang/tools/c-arcmt-test/c-arcmt-test.c
+++ b/clang/tools/c-arcmt-test/c-arcmt-test.c
@@ -1,8 +1,9 @@
 /* c-arcmt-test.c */
 
 #include "clang-c/Index.h"
-#include <stdlib.h>
+#include "llvm/Support/AutoConvert.h"
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #if defined(_WIN32)
 #include <io.h>
@@ -107,6 +108,14 @@ static void flush_atexit(void) {
 }
 
 int main(int argc, const char **argv) {
+#ifdef __MVS__
+  if (enableAutoConversion(fileno(stdout)) == -1)
+    fprintf(stderr, "Setting conversion on stdout failed\n");
+
+  if (enableAutoConversion(fileno(stderr)) == -1)
+    fprintf(stderr, "Setting conversion on stderr failed\n");
+#endif
+
   thread_info client_data;
 
   atexit(flush_atexit);
diff --git a/clang/tools/c-index-test/c-index-test.c b/clang/tools/c-index-test/c-index-test.c
index 2c0c9cb8eb5e4..6fa400a0675b7 100644
--- a/clang/tools/c-index-test/c-index-test.c
+++ b/clang/tools/c-index-test/c-index-test.c
@@ -8,6 +8,7 @@
 #include "clang-c/Documentation.h"
 #include "clang-c/Index.h"
 #include "clang/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
@@ -5150,6 +5151,14 @@ static void flush_atexit(void) {
 int main(int argc, const char **argv) {
   thread_info client_data;
 
+#ifdef __MVS__
+  if (enableAutoConversion(fileno(stdout)) == -1)
+    fprintf(stderr, "Setting conversion on stdout failed\n");
+
+  if (enableAutoConversion(fileno(stderr)) == -1)
+    fprintf(stderr, "Setting conversion on stderr failed\n");
+#endif
+
   atexit(flush_atexit);
 
 #ifdef CLANG_HAVE_LIBXML
diff --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h
index bcf7473feac8f..6608dd461d726 100644
--- a/llvm/include/llvm/Support/AutoConvert.h
+++ b/llvm/include/llvm/Support/AutoConvert.h
@@ -15,10 +15,27 @@
 #define LLVM_SUPPORT_AUTOCONVERT_H
 
 #ifdef __MVS__
-#define CCSID_IBM_1047 1047
-#define CCSID_UTF_8 1208
+#include <_Ccsid.h>
+#ifdef __cplusplus
 #include <system_error>
+#endif // __cplusplus
 
+#define CCSID_IBM_1047 1047
+#define CCSID_UTF_8 1208
+#define CCSID_ISO8859_1 819
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+int enableAutoConversion(int FD);
+int disableAutoConversion(int FD);
+int restoreStdHandleAutoConversion(int FD);
+int overrideAutoConversion(int FD, char *Filetag);
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#ifdef __cplusplus
 namespace llvm {
 
 /// \brief Disable the z/OS enhanced ASCII auto-conversion for the file
@@ -30,10 +47,14 @@ std::error_code disableAutoConversion(int FD);
 /// codepage.
 std::error_code enableAutoConversion(int FD);
 
+/// Restore the z/OS enhanced ASCII auto-conversion for the std handle.
+std::error_code restoreStdHandleAutoConversion(int FD);
+
 /// \brief Set the tag information for a file descriptor.
 std::error_code setFileTag(int FD, int CCSID, bool Text);
 
 } // namespace llvm
+#endif // __cplusplus
 
 #endif // __MVS__
 
diff --git a/llvm/lib/Support/AutoConvert.cpp b/llvm/lib/Support/AutoConvert.cpp
index 4fb7e242c3480..8170e553ac6e1 100644
--- a/llvm/lib/Support/AutoConvert.cpp
+++ b/llvm/lib/Support/AutoConvert.cpp
@@ -14,21 +14,36 @@
 #ifdef __MVS__
 
 #include "llvm/Support/AutoConvert.h"
+#include <cassert>
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <unistd.h>
 
-std::error_code llvm::disableAutoConversion(int FD) {
+static int savedStdHandleAutoConversionMode[3] = {-1, -1, -1};
+
+int disableAutoConversion(int FD) {
   static const struct f_cnvrt Convert = {
-      SETCVTOFF,        // cvtcmd
-      0,                // pccsid
-      (short)FT_BINARY, // fccsid
+      SETCVTOFF, // cvtcmd
+      0,         // pccsid
+      0,         // fccsid
   };
-  if (fcntl(FD, F_CONTROL_CVT, &Convert) == -1)
-    return std::error_code(errno, std::generic_category());
-  return std::error_code();
+
+  return fcntl(FD, F_CONTROL_CVT, &Convert);
 }
 
-std::error_code llvm::enableAutoConversion(int FD) {
+int restoreStdHandleAutoConversion(int FD) {
+  assert(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO);
+  if (savedStdHandleAutoConversionMode[FD] == -1)
+    return 0;
+  struct f_cnvrt Cvt = {
+      savedStdHandleAutoConversionMode[FD], // cvtcmd
+      0,                                    // pccsid
+      0,                                    // fccsid
+  };
+  return (fcntl(FD, F_CONTROL_CVT, &Cvt));
+}
+
+int enableAutoConversion(int FD) {
   struct f_cnvrt Query = {
       QUERYCVT, // cvtcmd
       0,        // pccsid
@@ -36,17 +51,53 @@ std::error_code llvm::enableAutoConversion(int FD) {
   };
 
   if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
-    return std::error_code(errno, std::generic_category());
+    return -1;
+
+  // We don't need conversion for UTF-8 tagged files.
+  // TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve
+  // problems related to UTF-8 tagged source files.
+  // When the pccsid is not ISO8859-1, autoconversion is still needed.
+  if (Query.pccsid == CCSID_ISO8859_1 &&
+      (Query.fccsid == CCSID_UTF_8 || Query.fccsid == CCSID_ISO8859_1))
+    return 0;
+
+  // Save the state of std handles before we make changes to it.
+  if ((FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) &&
+      savedStdHandleAutoConversionMode[FD] == -1)
+    savedStdHandleAutoConversionMode[FD] = Query.cvtcmd;
+
+  if (FD == STDOUT_FILENO || FD == STDERR_FILENO)
+    Query.cvtcmd = SETCVTON;
+  else
+    Query.cvtcmd = SETCVTALL;
 
-  Query.cvtcmd = SETCVTALL;
   Query.pccsid =
       (FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
           ? 0
           : CCSID_UTF_8;
   // Assume untagged files to be IBM-1047 encoded.
   Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
-  if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
+  return fcntl(FD, F_CONTROL_CVT, &Query);
+}
+
+std::error_code llvm::disableAutoConversion(int FD) {
+  if (::disableAutoConversion(FD) == -1)
+    return std::error_code(errno, std::generic_category());
+
+  return std::error_code();
+}
+
+std::error_code llvm::enableAutoConversion(int FD) {
+  if (::enableAutoConversion(FD) == -1)
     return std::error_code(errno, std::generic_category());
+
+  return std::error_code();
+}
+
+std::error_code llvm::restoreStdHandleAutoConversion(int FD) {
+  if (::restoreStdHandleAutoConversion(FD) == -1)
+    return std::error_code(errno, std::generic_category());
+
   return std::error_code();
 }
 
diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp
index 2b7173b289403..7f475f42f3cb8 100644
--- a/llvm/lib/Support/InitLLVM.cpp
+++ b/llvm/lib/Support/InitLLVM.cpp
@@ -8,6 +8,8 @@
 
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AutoConvert.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -15,15 +17,31 @@
 #include "llvm/Support/SwapByteOrder.h"
 
 #ifdef _WIN32
-#include "llvm/Support/Error.h"
 #include "llvm/Support/Windows/WindowsSupport.h"
 #endif
 
+#ifdef __MVS__
+#include <unistd.h>
+
+void CleanupStdHandles(void *Cookie) {
+  llvm::raw_ostream *Outs = &llvm::outs(), *Errs = &llvm::errs();
+  Outs->flush();
+  Errs->flush();
+  llvm::restoreStdHandleAutoConversion(STDIN_FILENO);
+  llvm::restoreStdHandleAutoConversion(STDOUT_FILENO);
+  llvm::restoreStdHandleAutoConversion(STDERR_FILENO);
+}
+#endif
+
 using namespace llvm;
 using namespace llvm::sys;
 
 InitLLVM::InitLLVM(int &Argc, const char **&Argv,
                    bool InstallPipeSignalExitHandler) {
+#ifdef __MVS__
+  // Bring stdin/stdout/stderr into a known state.
+  sys::AddSignalHandler(CleanupStdHandles, nullptr);
+#endif
   if (InstallPipeSignalExitHandler)
     // The pipe signal handler must be installed before any other handlers are
     // registered. This is because the Unix \ref RegisterHandlers function does
@@ -37,6 +55,20 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
   sys::PrintStackTraceOnErrorSignal(Argv[0]);
   install_out_of_memory_new_handler();
 
+#ifdef __MVS__
+
+  // We use UTF-8 as the internal character encoding. On z/OS, all external
+  // output is encoded in EBCDIC. In order to be able to read all
+  // error messages, we turn conversion to EBCDIC on for stderr fd.
+  std::string Banner = std::string(Argv[0]) + ": ";
+  ExitOnError ExitOnErr(Banner);
+
+  // If turning on conversion for stderr fails then the error message
+  // may be garbled. There is no solution to this problem.
+  ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDERR_FILENO)));
+  ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDOUT_FILENO)));
+#endif
+
 #ifdef _WIN32
   // We use UTF-8 as the internal character encoding. On Windows,
   // arguments passed to main() may not be encoded in UTF-8. In order
@@ -61,4 +93,9 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
 #endif
 }
 
-InitLLVM::~InitLLVM() { llvm_shutdown(); }
+InitLLVM::~InitLLVM() {
+#ifdef __MVS__
+  CleanupStdHandles(nullptr);
+#endif
+  llvm_shutdown();
+}
diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index 9466d0f0ba859..2e17d8c4ea3da 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -20,6 +20,7 @@
 #include "Unix.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
@@ -520,8 +521,12 @@ std::error_code llvm::sys::ChangeStdoutMode(fs::OpenFlags Flags) {
 }
 
 std::error_code llvm::sys::ChangeStdinToBinary() {
+#ifdef __MVS__
+  return disableAutoConversion(STDIN_FILENO);
+#else
   // Do nothing, as Unix doesn't differentiate between text and binary.
   return std::error_code();
+#endif
 }
 
 std::error_code llvm::sys::ChangeStdoutToBinary() {
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 8908e7b6a150c..d654ae450b340 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Duration.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -895,6 +896,10 @@ void raw_fd_ostream::anchor() {}
 raw_fd_ostream &llvm::outs() {
   // Set buffer settings to model stdout behavior.
   std::error_code EC;
+#ifdef __MVS__
+  EC = enableAutoConversion(STDOUT_FILENO);
+  assert(!EC);
+#endif
   static raw_fd_ostream S("-", EC, sys::fs::OF_None);
   assert(!EC);
   return S;
@@ -902,6 +907,10 @@ raw_fd_ostream &llvm::outs() {
 
 raw_fd_ostream &llvm::errs() {
   // Set standard error to be unbuffered and tied to outs() by default.
+#ifdef __MVS__
+  std::error_code EC = enableAutoConversion(STDOUT_FILENO);
+  assert(!EC);
+#endif
   static raw_fd_ostream S(STDERR_FILENO, false, true);
   return S;
 }
diff --git a/llvm/utils/count/CMakeLists.txt b/llvm/utils/count/CMakeLists.txt
index 4e0d371334e47..cfd1f4a85d8a1 100644
--- a/llvm/utils/count/CMakeLists.txt
+++ b/llvm/utils/count/CMakeLists.txt
@@ -1,3 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+  support
+)
+
 add_llvm_utility(count
   count.c
   )
diff --git a/llvm/utils/count/count.c b/llvm/utils/count/count.c
index 7149c14a63abb..300be2aa8a18e 100644
--- a/llvm/utils/count/count.c
+++ b/llvm/utils/count/count.c
@@ -6,10 +6,18 @@
  *
 \*===----------------------------------------------------------------------===*/
 
-#include <stdlib.h>
+#include "llvm/Support/AutoConvert.h"
 #include <stdio.h>
+#include <stdlib.h>
 
 int main(int argc, char **argv) {
+#ifdef __MVS__
+  if (enableAutoConversion(fileno(stdin)) == -1)
+    fprintf(stderr, "Setting conversion on stdin failed\n");
+
+  if (enableAutoConversion(fileno(stderr)) == -1)
+    fprintf(stdout, "Setting conversion on stderr failed\n");
+#endif
   size_t Count, NumLines, NumRead;
   char Buffer[4096], *End;
 



More information about the llvm-commits mailing list