[llvm] 1527a5e - [SystemZ][z/OS] Add the functions needed for handling EBCDIC I/O

Abhina Sreeskantharajan via llvm-commits llvm-commits at lists.llvm.org
Mon May 3 05:52:44 PDT 2021


Author: Abhina Sreeskantharajan
Date: 2021-05-03T08:52:38-04:00
New Revision: 1527a5e4b4834e65678f9c30f786a2f4c17932bf

URL: https://github.com/llvm/llvm-project/commit/1527a5e4b4834e65678f9c30f786a2f4c17932bf
DIFF: https://github.com/llvm/llvm-project/commit/1527a5e4b4834e65678f9c30f786a2f4c17932bf.diff

LOG: [SystemZ][z/OS] Add the functions needed for handling EBCDIC I/O

This patch adds the basic functions needed for controlling auto conversion on z/OS.
Auto conversion is enabled on untagged input file to ASCII by making the assumption that all untagged files are EBCDIC encoded. Output files are auto converted to EBCDIC IBM-1047.
This change also enables conversion for stdin/stdout/stderr.

For more information on how fcntl controls codepage https://www.ibm.com/docs/en/zos/2.4.0?topic=descriptions-fcntl-bpx1fct-bpx4fct-control-open-file-descriptors

Reviewed By: anirudhp

Differential Revision: https://reviews.llvm.org/D100483

Added: 
    llvm/include/llvm/Support/AutoConvert.h
    llvm/lib/Support/AutoConvert.cpp
    llvm/test/Support/encoding.ll

Modified: 
    llvm/lib/Support/CMakeLists.txt
    llvm/lib/Support/MemoryBuffer.cpp
    llvm/lib/Support/Unix/Path.inc

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h
new file mode 100644
index 0000000000000..bcf7473feac8f
--- /dev/null
+++ b/llvm/include/llvm/Support/AutoConvert.h
@@ -0,0 +1,40 @@
+//===- AutoConvert.h - Auto conversion between ASCII/EBCDIC -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used for auto conversion between
+// ASCII/EBCDIC codepages specific to z/OS.
+//
+//===----------------------------------------------------------------------===//i
+
+#ifndef LLVM_SUPPORT_AUTOCONVERT_H
+#define LLVM_SUPPORT_AUTOCONVERT_H
+
+#ifdef __MVS__
+#define CCSID_IBM_1047 1047
+#define CCSID_UTF_8 1208
+#include <system_error>
+
+namespace llvm {
+
+/// \brief Disable the z/OS enhanced ASCII auto-conversion for the file
+/// descriptor.
+std::error_code disableAutoConversion(int FD);
+
+/// \brief Query the z/OS enhanced ASCII auto-conversion status of a file
+/// descriptor and force the conversion if the file is not tagged with a
+/// codepage.
+std::error_code enableAutoConversion(int FD);
+
+/// \brief Set the tag information for a file descriptor.
+std::error_code setFileTag(int FD, int CCSID, bool Text);
+
+} // namespace llvm
+
+#endif // __MVS__
+
+#endif // LLVM_SUPPORT_AUTOCONVERT_H

diff  --git a/llvm/lib/Support/AutoConvert.cpp b/llvm/lib/Support/AutoConvert.cpp
new file mode 100644
index 0000000000000..e35b1c83d3fce
--- /dev/null
+++ b/llvm/lib/Support/AutoConvert.cpp
@@ -0,0 +1,61 @@
+//===- AutoConvert.cpp - Auto conversion between ASCII/EBCDIC -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used for auto conversion between
+// ASCII/EBCDIC codepages specific to z/OS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __MVS__
+
+#include "llvm/Support/AutoConvert.h"
+#include <fcntl.h>
+#include <sys/stat.h>
+
+std::error_code llvm::disableAutoConversion(int FD) {
+  static const struct f_cnvrt Convert = {
+      SETCVTOFF,        // cvtcmd
+      0,                // pccsid
+      (short)FT_BINARY, // fccsid
+  };
+  return fcntl(FD, F_CONTROL_CVT, &Convert);
+}
+
+std::error_code llvm::enableAutoConversion(int FD) {
+  struct f_cnvrt Query = {
+      QUERYCVT, // cvtcmd
+      0,        // pccsid
+      0,        // fccsid
+  };
+
+  if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
+    return -1;
+
+  Query.cvtcmd = SETCVTALL;
+  Query.pccsid =
+      (FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
+          ? 0
+          : CCSID_UTF_8;
+  // Assume untagged files to be IBM-1047 encoded.
+  Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
+  return fcntl(FD, F_CONTROL_CVT, &Query);
+}
+
+std::error_code llvm::setFileTag(int FD, int CCSID, bool Text) {
+  assert((!Text || (CCSID != FT_UNTAGGED && CCSID != FT_BINARY)) &&
+         "FT_UNTAGGED and FT_BINARY are not allowed for text files");
+  struct file_tag Tag;
+  Tag.ft_ccsid = CCSID;
+  Tag.ft_txtflag = Text;
+  Tag.ft_deferred = 0;
+  Tag.ft_rsvflags = 0;
+
+  return fcntl(FD, F_SETTAG, &Tag);
+}
+
+#endif // __MVS__

diff  --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index ddacb4feaa0f2..2242b0ec60abc 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -93,6 +93,7 @@ add_llvm_component_library(LLVMSupport
   ARMAttributeParser.cpp
   ARMWinEH.cpp
   Allocator.cpp
+  AutoConvert.cpp
   BinaryStreamError.cpp
   BinaryStreamReader.cpp
   BinaryStreamRef.cpp

diff  --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 49524f3f6d5ed..bcf13d828a5df 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"
@@ -467,6 +468,12 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
       return std::move(Result);
   }
 
+#ifdef __MVS__
+  // Set codepage auto-conversion for z/OS.
+  if (auto EC = llvm::enableAutoConversion(FD))
+    return EC;
+#endif
+
   auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
   if (!Buf) {
     // Failed to create a buffer. The only way it can fail is if

diff  --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 56ed17e364e89..68e3a317f0496 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -50,6 +50,7 @@ extern char **environ;
 #elif defined(__DragonFly__)
 #include <sys/mount.h>
 #elif defined(__MVS__)
+#include "llvm/Support/AutoConvert.h"
 #include <sys/ps.h>
 #endif
 
@@ -959,8 +960,13 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags,
     // Nothing special, just don't add O_CREAT and we get these semantics.
   }
 
+// Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when
+// calling write(). Instead we need to use lseek() to set offset to EOF after
+// open().
+#ifndef __MVS__
   if (Flags & OF_Append)
     Result |= O_APPEND;
+#endif
 
 #ifdef O_CLOEXEC
   if (!(Flags & OF_ChildInherit))
@@ -989,6 +995,88 @@ std::error_code openFile(const Twine &Name, int &ResultFD,
     assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed");
   }
 #endif
+
+#ifdef __MVS__
+  /* Reason about auto-conversion and file tags. Setting the file tag only
+   * applies if file is opened in write mode:
+   *
+   * Text file:
+   *                  File exists       File created
+   * CD_CreateNew     n/a               conv: on
+   *                                    tag: set 1047
+   * CD_CreateAlways  conv: auto        conv: on
+   *                  tag: auto 1047    tag: set 1047
+   * CD_OpenAlways    conv: auto        conv: on
+   *                  tag: auto 1047    tag: set 1047
+   * CD_OpenExisting  conv: auto        n/a
+   *                  tag: unchanged
+   *
+   * Binary file:
+   *                  File exists       File created
+   * CD_CreateNew     n/a               conv: off
+   *                                    tag: set binary
+   * CD_CreateAlways  conv: off         conv: off
+   *                  tag: auto binary  tag: set binary
+   * CD_OpenAlways    conv: off         conv: off
+   *                  tag: auto binary  tag: set binary
+   * CD_OpenExisting  conv: off         n/a
+   *                  tag: unchanged
+   *
+   * Actions:
+   *   conv: off        -> auto-conversion is turned off
+   *   conv: on         -> auto-conversion is turned on
+   *   conv: auto       -> auto-conversion is turned on if the file is untagged
+   *   tag: set 1047    -> set the file tag to text encoded in 1047
+   *   tag: set binary  -> set the file tag to binary
+   *   tag: auto 1047   -> set file tag to 1047 if not set
+   *   tag: auto binary -> set file tag to binary if not set
+   *   tag: unchanged   -> do not care about the file tag
+   *
+   * It is not possible to distinguish between the cases "file exists" and
+   * "file created". In the latter case, the file tag is not set and the file
+   * size is zero. The decision table boils down to:
+   *
+   * the file tag is set if
+   *   - the file is opened for writing
+   *   - the create disposition is not equal to CD_OpenExisting
+   *   - the file tag is not set
+   *   - the file size is zero
+   *
+   * This only applies if the file is a regular file. E.g. enabling
+   * auto-conversion for reading from /dev/null results in error EINVAL when
+   * calling read().
+   *
+   * Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when
+   * calling write(). Instead we need to use lseek() to set offset to EOF after
+   * open().
+   */
+  if ((Flags & OF_Append) && lseek(ResultFD, 0, SEEK_END) == -1)
+    return std::error_code(errno, std::generic_category());
+  struct stat Stat;
+  if (fstat(ResultFD, &Stat) == -1)
+    return std::error_code(errno, std::generic_category());
+  if (S_ISREG(Stat.st_mode)) {
+    bool DoSetTag = (Access & FA_Write) && (Disp != CD_OpenExisting) &&
+                    !Stat.st_tag.ft_txtflag && !Stat.st_tag.ft_ccsid &&
+                    Stat.st_size == 0;
+    if (Flags & OF_Text) {
+      if (auto EC = llvm::enableAutoConversion(ResultFD))
+        return EC;
+      if (DoSetTag) {
+        if (auto EC = llvm::setFileTag(ResultFD, CCSID_IBM_1047, true))
+          return EC;
+      }
+    } else {
+      if (auto EC = llvm::disableAutoConversion(ResultFD))
+        return EC;
+      if (DoSetTag) {
+        if (auto EC = llvm::setFileTag(ResultFD, FT_BINARY, false))
+          return EC;
+      }
+    }
+  }
+#endif
+
   return std::error_code();
 }
 

diff  --git a/llvm/test/Support/encoding.ll b/llvm/test/Support/encoding.ll
new file mode 100644
index 0000000000000..26a46b3680d73
--- /dev/null
+++ b/llvm/test/Support/encoding.ll
@@ -0,0 +1,10 @@
+; Checks if llc can deal with 
diff erent char encodings.
+; This is only required for z/OS.
+;
+; UNSUPPORTED: !s390x-none-zos
+;
+; RUN: cat %s >%t && chtag -tc ISO8859-1 %t && llc %t -o - >/dev/null
+; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -tc IBM-1047 %t && llc %t -o - >/dev/null
+; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -r %t && llc %t -o - >/dev/null
+
+ at g_105 = external dso_local global i8, align 2


        


More information about the llvm-commits mailing list