[libcxx-commits] [libcxx] 4247381 - [SystemZ][z/OS] Missing wchar functions libc++
Muiez Ahmed via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Sep 13 06:44:22 PDT 2021
Author: Muiez Ahmed
Date: 2021-09-13T09:43:21-04:00
New Revision: 4247381e26dd6ec476ef9873f5b7db1e01aad2b6
URL: https://github.com/llvm/llvm-project/commit/4247381e26dd6ec476ef9873f5b7db1e01aad2b6
DIFF: https://github.com/llvm/llvm-project/commit/4247381e26dd6ec476ef9873f5b7db1e01aad2b6.diff
LOG: [SystemZ][z/OS] Missing wchar functions libc++
The aim is to add the missing z/OS specific implementations for mbsnrtowcs and wcsnrtombs, as part of libc++.
Differential Revision: https://reviews.llvm.org/D98207
Added:
libcxx/src/support/ibm/mbsnrtowcs.cpp
libcxx/src/support/ibm/wcsnrtombs.cpp
Modified:
libcxx/include/wchar.h
libcxx/src/CMakeLists.txt
Removed:
################################################################################
diff --git a/libcxx/include/wchar.h b/libcxx/include/wchar.h
index c556ae8908760..4d391f9ebd0a3 100644
--- a/libcxx/include/wchar.h
+++ b/libcxx/include/wchar.h
@@ -170,13 +170,13 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD
}
#endif
-#if defined(__cplusplus) && defined(_LIBCPP_MSVCRT_LIKE)
+#if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__))
extern "C" {
size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t nmc, size_t len, mbstate_t *__restrict ps);
size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
size_t nwc, size_t len, mbstate_t *__restrict ps);
-} // extern "C++"
-#endif // __cplusplus && _LIBCPP_MSVCRT
+} // extern "C"
+#endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__)
#endif // _LIBCPP_WCHAR_H
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 25994cadf4491..0bcfb1776271c 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -98,6 +98,8 @@ elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "SunOS")
)
elseif(ZOS)
list(APPEND LIBCXX_SOURCES
+ support/ibm/mbsnrtowcs.cpp
+ support/ibm/wcsnrtombs.cpp
support/ibm/xlocale_zos.cpp
)
endif()
diff --git a/libcxx/src/support/ibm/mbsnrtowcs.cpp b/libcxx/src/support/ibm/mbsnrtowcs.cpp
new file mode 100644
index 0000000000000..125bdbea1c5e0
--- /dev/null
+++ b/libcxx/src/support/ibm/mbsnrtowcs.cpp
@@ -0,0 +1,95 @@
+//===----------------------- mbsnrtowcs.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstddef> // size_t
+#include <cwchar> // mbstate_t
+#include <limits.h> // MB_LEN_MAX
+#include <string.h> // wmemcpy
+
+// Returns the number of wide characters found in the multi byte sequence `src`
+// (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
+// elements size). The count returned excludes the null terminator.
+// When `dst` is NULL, no characters are copied to `dst`.
+// Returns (size_t) -1 when an invalid sequence is encountered.
+// Leaves *`src` pointing to the next character to convert or NULL
+// if a null character was converted from *`src`.
+_LIBCPP_FUNC_VIS
+size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t src_size_bytes, size_t max_dest_chars,
+ mbstate_t *__restrict ps) {
+ const size_t terminated_sequence = static_cast<size_t>(0);
+ const size_t invalid_sequence = static_cast<size_t>(-1);
+ const size_t incomplete_sequence = static_cast<size_t>(-2);
+
+ size_t source_converted;
+ size_t dest_converted;
+ size_t result = 0;
+
+ // If `dst` is null then `max_dest_chars` should be ignored according to the
+ // standard. Setting `max_dest_chars` to a large value has this effect.
+ if (dst == nullptr)
+ max_dest_chars = static_cast<size_t>(-1);
+
+ for (dest_converted = source_converted = 0;
+ source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
+ ++dest_converted, source_converted += result) {
+ // Converts one multi byte character.
+ // If result (char_size) is greater than 0, it's the size in bytes of that character.
+ // If result (char_size) is zero, it indicates that the null character has been found.
+ // Otherwise, it's an error and errno may be set.
+ size_t source_remaining = src_size_bytes - source_converted;
+ size_t dest_remaining = max_dest_chars - dest_converted;
+
+ if (dst == nullptr) {
+ result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
+ } else if (dest_remaining >= source_remaining) {
+ // dst has enough space to translate in-place.
+ result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
+ } else {
+ /*
+ * dst may not have enough space, so use a temporary buffer.
+ *
+ * We need to save a copy of the conversion state
+ * here so we can restore it if the multibyte
+ * character is too long for the buffer.
+ */
+ wchar_t buff[MB_LEN_MAX];
+ mbstate_t mbstate_tmp;
+
+ if (ps != nullptr)
+ mbstate_tmp = *ps;
+ result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
+
+ if (result > dest_remaining) {
+ // Multi-byte sequence for character won't fit.
+ if (ps != nullptr)
+ *ps = mbstate_tmp;
+ break;
+ } else {
+ // The buffer was used, so we need copy the translation to dst.
+ wmemcpy(dst, buff, result);
+ }
+ }
+
+ // Don't do anything to change errno from here on.
+ if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
+ break;
+ }
+ }
+
+ if (dst) {
+ if (result == terminated_sequence)
+ *src = NULL;
+ else
+ *src += source_converted;
+ }
+ if (result == invalid_sequence)
+ return invalid_sequence;
+
+ return dest_converted;
+}
diff --git a/libcxx/src/support/ibm/wcsnrtombs.cpp b/libcxx/src/support/ibm/wcsnrtombs.cpp
new file mode 100644
index 0000000000000..3096e300d4376
--- /dev/null
+++ b/libcxx/src/support/ibm/wcsnrtombs.cpp
@@ -0,0 +1,93 @@
+//===----------------------- wcsnrtombs.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <cwchar> // mbstate_t
+#include <limits.h> // MB_LEN_MAX
+#include <stdlib.h> // MB_CUR_MAX, size_t
+#include <string.h> // memcpy
+
+// Converts `max_source_chars` from the wide character buffer pointer to by *`src`,
+// into the multi byte character sequence buffer stored at `dst`, which must be
+// `dst_size_bytes` bytes in size. Returns the number of bytes in the sequence
+// converted from *src, excluding the null terminator.
+// Returns (size_t) -1 if an error occurs and sets errno.
+// If `dst` is NULL, `dst_size_bytes` is ignored and no bytes are copied to `dst`.
+_LIBCPP_FUNC_VIS
+size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
+ size_t max_source_chars, size_t dst_size_bytes,
+ mbstate_t *__restrict ps) {
+
+ const size_t invalid_wchar = static_cast<size_t>(-1);
+
+ size_t source_converted;
+ size_t dest_converted;
+ size_t result = 0;
+
+ // If `dst` is null then `dst_size_bytes` should be ignored according to the
+ // standard. Setting dst_size_bytes to a large value has this effect.
+ if (dst == nullptr)
+ dst_size_bytes = static_cast<size_t>(-1);
+
+ for (dest_converted = source_converted = 0;
+ source_converted < max_source_chars && (!dst || dest_converted < dst_size_bytes);
+ ++source_converted, dest_converted += result) {
+ wchar_t c = (*src)[source_converted];
+ size_t dest_remaining = dst_size_bytes - dest_converted;
+
+ if (dst == nullptr) {
+ result = wcrtomb(NULL, c, ps);
+ } else if (dest_remaining >= static_cast<size_t>(MB_CUR_MAX)) {
+ // dst has enough space to translate in-place.
+ result = wcrtomb(dst + dest_converted, c, ps);
+ } else {
+ /*
+ * dst may not have enough space, so use a temporary buffer.
+ *
+ * We need to save a copy of the conversion state
+ * here so we can restore it if the multibyte
+ * character is too long for the buffer.
+ */
+ char buff[MB_LEN_MAX];
+ mbstate_t mbstate_tmp;
+
+ if (ps != nullptr)
+ mbstate_tmp = *ps;
+ result = wcrtomb(buff, c, ps);
+
+ if (result > dest_remaining) {
+ // Multi-byte sequence for character won't fit.
+ if (ps != nullptr)
+ *ps = mbstate_tmp;
+ if (result != invalid_wchar)
+ break;
+ } else {
+ // The buffer was used, so we need copy the translation to dst.
+ memcpy(dst, buff, result);
+ }
+ }
+
+ // result (char_size) contains the size of the multi-byte-sequence converted.
+ // Otherwise, result (char_size) is (size_t) -1 and wcrtomb() sets the errno.
+ if (result == invalid_wchar) {
+ if (dst)
+ *src = *src + source_converted;
+ return invalid_wchar;
+ }
+
+ if (c == L'\0') {
+ if (dst)
+ *src = NULL;
+ return dest_converted;
+ }
+ }
+
+ if (dst)
+ *src = *src + source_converted;
+
+ return dest_converted;
+}
More information about the libcxx-commits
mailing list