[llvm] 1527a5e - [SystemZ][z/OS] Add the functions needed for handling EBCDIC I/O
Abhina Sreeskantharajan via llvm-commits
llvm-commits at lists.llvm.org
Mon May 3 05:52:44 PDT 2021
Author: Abhina Sreeskantharajan
Date: 2021-05-03T08:52:38-04:00
New Revision: 1527a5e4b4834e65678f9c30f786a2f4c17932bf
URL: https://github.com/llvm/llvm-project/commit/1527a5e4b4834e65678f9c30f786a2f4c17932bf
DIFF: https://github.com/llvm/llvm-project/commit/1527a5e4b4834e65678f9c30f786a2f4c17932bf.diff
LOG: [SystemZ][z/OS] Add the functions needed for handling EBCDIC I/O
This patch adds the basic functions needed for controlling auto conversion on z/OS.
Auto conversion is enabled on untagged input file to ASCII by making the assumption that all untagged files are EBCDIC encoded. Output files are auto converted to EBCDIC IBM-1047.
This change also enables conversion for stdin/stdout/stderr.
For more information on how fcntl controls codepage https://www.ibm.com/docs/en/zos/2.4.0?topic=descriptions-fcntl-bpx1fct-bpx4fct-control-open-file-descriptors
Reviewed By: anirudhp
Differential Revision: https://reviews.llvm.org/D100483
Added:
llvm/include/llvm/Support/AutoConvert.h
llvm/lib/Support/AutoConvert.cpp
llvm/test/Support/encoding.ll
Modified:
llvm/lib/Support/CMakeLists.txt
llvm/lib/Support/MemoryBuffer.cpp
llvm/lib/Support/Unix/Path.inc
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h
new file mode 100644
index 0000000000000..bcf7473feac8f
--- /dev/null
+++ b/llvm/include/llvm/Support/AutoConvert.h
@@ -0,0 +1,40 @@
+//===- AutoConvert.h - Auto conversion between ASCII/EBCDIC -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used for auto conversion between
+// ASCII/EBCDIC codepages specific to z/OS.
+//
+//===----------------------------------------------------------------------===//i
+
+#ifndef LLVM_SUPPORT_AUTOCONVERT_H
+#define LLVM_SUPPORT_AUTOCONVERT_H
+
+#ifdef __MVS__
+#define CCSID_IBM_1047 1047
+#define CCSID_UTF_8 1208
+#include <system_error>
+
+namespace llvm {
+
+/// \brief Disable the z/OS enhanced ASCII auto-conversion for the file
+/// descriptor.
+std::error_code disableAutoConversion(int FD);
+
+/// \brief Query the z/OS enhanced ASCII auto-conversion status of a file
+/// descriptor and force the conversion if the file is not tagged with a
+/// codepage.
+std::error_code enableAutoConversion(int FD);
+
+/// \brief Set the tag information for a file descriptor.
+std::error_code setFileTag(int FD, int CCSID, bool Text);
+
+} // namespace llvm
+
+#endif // __MVS__
+
+#endif // LLVM_SUPPORT_AUTOCONVERT_H
diff --git a/llvm/lib/Support/AutoConvert.cpp b/llvm/lib/Support/AutoConvert.cpp
new file mode 100644
index 0000000000000..e35b1c83d3fce
--- /dev/null
+++ b/llvm/lib/Support/AutoConvert.cpp
@@ -0,0 +1,61 @@
+//===- AutoConvert.cpp - Auto conversion between ASCII/EBCDIC -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used for auto conversion between
+// ASCII/EBCDIC codepages specific to z/OS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __MVS__
+
+#include "llvm/Support/AutoConvert.h"
+#include <fcntl.h>
+#include <sys/stat.h>
+
+std::error_code llvm::disableAutoConversion(int FD) {
+ static const struct f_cnvrt Convert = {
+ SETCVTOFF, // cvtcmd
+ 0, // pccsid
+ (short)FT_BINARY, // fccsid
+ };
+ return fcntl(FD, F_CONTROL_CVT, &Convert);
+}
+
+std::error_code llvm::enableAutoConversion(int FD) {
+ struct f_cnvrt Query = {
+ QUERYCVT, // cvtcmd
+ 0, // pccsid
+ 0, // fccsid
+ };
+
+ if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
+ return -1;
+
+ Query.cvtcmd = SETCVTALL;
+ Query.pccsid =
+ (FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
+ ? 0
+ : CCSID_UTF_8;
+ // Assume untagged files to be IBM-1047 encoded.
+ Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
+ return fcntl(FD, F_CONTROL_CVT, &Query);
+}
+
+std::error_code llvm::setFileTag(int FD, int CCSID, bool Text) {
+ assert((!Text || (CCSID != FT_UNTAGGED && CCSID != FT_BINARY)) &&
+ "FT_UNTAGGED and FT_BINARY are not allowed for text files");
+ struct file_tag Tag;
+ Tag.ft_ccsid = CCSID;
+ Tag.ft_txtflag = Text;
+ Tag.ft_deferred = 0;
+ Tag.ft_rsvflags = 0;
+
+ return fcntl(FD, F_SETTAG, &Tag);
+}
+
+#endif // __MVS__
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index ddacb4feaa0f2..2242b0ec60abc 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -93,6 +93,7 @@ add_llvm_component_library(LLVMSupport
ARMAttributeParser.cpp
ARMWinEH.cpp
Allocator.cpp
+ AutoConvert.cpp
BinaryStreamError.cpp
BinaryStreamReader.cpp
BinaryStreamRef.cpp
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 49524f3f6d5ed..bcf13d828a5df 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/FileSystem.h"
@@ -467,6 +468,12 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
return std::move(Result);
}
+#ifdef __MVS__
+ // Set codepage auto-conversion for z/OS.
+ if (auto EC = llvm::enableAutoConversion(FD))
+ return EC;
+#endif
+
auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
if (!Buf) {
// Failed to create a buffer. The only way it can fail is if
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 56ed17e364e89..68e3a317f0496 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -50,6 +50,7 @@ extern char **environ;
#elif defined(__DragonFly__)
#include <sys/mount.h>
#elif defined(__MVS__)
+#include "llvm/Support/AutoConvert.h"
#include <sys/ps.h>
#endif
@@ -959,8 +960,13 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags,
// Nothing special, just don't add O_CREAT and we get these semantics.
}
+// Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when
+// calling write(). Instead we need to use lseek() to set offset to EOF after
+// open().
+#ifndef __MVS__
if (Flags & OF_Append)
Result |= O_APPEND;
+#endif
#ifdef O_CLOEXEC
if (!(Flags & OF_ChildInherit))
@@ -989,6 +995,88 @@ std::error_code openFile(const Twine &Name, int &ResultFD,
assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed");
}
#endif
+
+#ifdef __MVS__
+ /* Reason about auto-conversion and file tags. Setting the file tag only
+ * applies if file is opened in write mode:
+ *
+ * Text file:
+ * File exists File created
+ * CD_CreateNew n/a conv: on
+ * tag: set 1047
+ * CD_CreateAlways conv: auto conv: on
+ * tag: auto 1047 tag: set 1047
+ * CD_OpenAlways conv: auto conv: on
+ * tag: auto 1047 tag: set 1047
+ * CD_OpenExisting conv: auto n/a
+ * tag: unchanged
+ *
+ * Binary file:
+ * File exists File created
+ * CD_CreateNew n/a conv: off
+ * tag: set binary
+ * CD_CreateAlways conv: off conv: off
+ * tag: auto binary tag: set binary
+ * CD_OpenAlways conv: off conv: off
+ * tag: auto binary tag: set binary
+ * CD_OpenExisting conv: off n/a
+ * tag: unchanged
+ *
+ * Actions:
+ * conv: off -> auto-conversion is turned off
+ * conv: on -> auto-conversion is turned on
+ * conv: auto -> auto-conversion is turned on if the file is untagged
+ * tag: set 1047 -> set the file tag to text encoded in 1047
+ * tag: set binary -> set the file tag to binary
+ * tag: auto 1047 -> set file tag to 1047 if not set
+ * tag: auto binary -> set file tag to binary if not set
+ * tag: unchanged -> do not care about the file tag
+ *
+ * It is not possible to distinguish between the cases "file exists" and
+ * "file created". In the latter case, the file tag is not set and the file
+ * size is zero. The decision table boils down to:
+ *
+ * the file tag is set if
+ * - the file is opened for writing
+ * - the create disposition is not equal to CD_OpenExisting
+ * - the file tag is not set
+ * - the file size is zero
+ *
+ * This only applies if the file is a regular file. E.g. enabling
+ * auto-conversion for reading from /dev/null results in error EINVAL when
+ * calling read().
+ *
+ * Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when
+ * calling write(). Instead we need to use lseek() to set offset to EOF after
+ * open().
+ */
+ if ((Flags & OF_Append) && lseek(ResultFD, 0, SEEK_END) == -1)
+ return std::error_code(errno, std::generic_category());
+ struct stat Stat;
+ if (fstat(ResultFD, &Stat) == -1)
+ return std::error_code(errno, std::generic_category());
+ if (S_ISREG(Stat.st_mode)) {
+ bool DoSetTag = (Access & FA_Write) && (Disp != CD_OpenExisting) &&
+ !Stat.st_tag.ft_txtflag && !Stat.st_tag.ft_ccsid &&
+ Stat.st_size == 0;
+ if (Flags & OF_Text) {
+ if (auto EC = llvm::enableAutoConversion(ResultFD))
+ return EC;
+ if (DoSetTag) {
+ if (auto EC = llvm::setFileTag(ResultFD, CCSID_IBM_1047, true))
+ return EC;
+ }
+ } else {
+ if (auto EC = llvm::disableAutoConversion(ResultFD))
+ return EC;
+ if (DoSetTag) {
+ if (auto EC = llvm::setFileTag(ResultFD, FT_BINARY, false))
+ return EC;
+ }
+ }
+ }
+#endif
+
return std::error_code();
}
diff --git a/llvm/test/Support/encoding.ll b/llvm/test/Support/encoding.ll
new file mode 100644
index 0000000000000..26a46b3680d73
--- /dev/null
+++ b/llvm/test/Support/encoding.ll
@@ -0,0 +1,10 @@
+; Checks if llc can deal with
diff erent char encodings.
+; This is only required for z/OS.
+;
+; UNSUPPORTED: !s390x-none-zos
+;
+; RUN: cat %s >%t && chtag -tc ISO8859-1 %t && llc %t -o - >/dev/null
+; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -tc IBM-1047 %t && llc %t -o - >/dev/null
+; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -r %t && llc %t -o - >/dev/null
+
+ at g_105 = external dso_local global i8, align 2
More information about the llvm-commits
mailing list