[libc-commits] [libc] 46b5087 - [libc] add basic wide char functions

Michael Jones via libc-commits libc-commits at lists.llvm.org
Mon Mar 20 16:36:26 PDT 2023


Author: Michael Jones
Date: 2023-03-20T16:36:21-07:00
New Revision: 46b5087227246c9f89569af3a96126d85c87bbc4

URL: https://github.com/llvm/llvm-project/commit/46b5087227246c9f89569af3a96126d85c87bbc4
DIFF: https://github.com/llvm/llvm-project/commit/46b5087227246c9f89569af3a96126d85c87bbc4.diff

LOG: [libc] add basic wide char functions

This patch adds the wchar header, as well as the functions to convert to
and from wide chars. The header also sets up the definitions for wint
and wchar.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D145995

Added: 
    libc/include/llvm-libc-macros/wchar-macros.h
    libc/include/llvm-libc-types/wchar_t.h
    libc/include/llvm-libc-types/wint_t.h
    libc/include/wchar.h.def
    libc/src/__support/wctype_utils.h
    libc/src/wchar/CMakeLists.txt
    libc/src/wchar/btowc.cpp
    libc/src/wchar/btowc.h
    libc/src/wchar/wctob.cpp
    libc/src/wchar/wctob.h
    libc/test/src/wchar/CMakeLists.txt
    libc/test/src/wchar/btowc_test.cpp
    libc/test/src/wchar/wctob_test.cpp

Modified: 
    libc/config/linux/api.td
    libc/config/linux/x86_64/entrypoints.txt
    libc/config/linux/x86_64/headers.txt
    libc/include/CMakeLists.txt
    libc/include/llvm-libc-macros/CMakeLists.txt
    libc/include/llvm-libc-types/CMakeLists.txt
    libc/include/llvm-libc-types/size_t.h
    libc/spec/spec.td
    libc/spec/stdc.td
    libc/src/CMakeLists.txt
    libc/src/__support/CMakeLists.txt
    libc/test/src/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 253546fe83e0a..10b4abfee7a43 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -178,6 +178,14 @@ def UniStdAPI : PublicAPI<"unistd.h"> {
                "ssize_t", "uid_t", "__getoptargv_t"];
 }
 
+def WCharAPI : PublicAPI<"wchar.h"> {
+  let Types = [
+    "wchar_t",
+    "wint_t",
+    "size_t",
+  ];
+}
+
 def SysRandomAPI : PublicAPI<"sys/random.h"> {
   let Types = ["size_t", "ssize_t"];
 }

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 5899c1d00c865..39b7e8c84effa 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -193,6 +193,9 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.unlink
     libc.src.unistd.unlinkat
     libc.src.unistd.write
+
+    # wchar.h entrypoints
+    libc.src.wchar.wctob
 )
 
 set(TARGET_LIBM_ENTRYPOINTS

diff  --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index e285105b1224a..aaa75a9dd08cb 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -20,6 +20,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.threads
     libc.include.time
     libc.include.unistd
+    libc.include.wchar
 
     libc.include.arpa_inet
 

diff  --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index d058cb51db0a1..75ef824d2e3f0 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -468,6 +468,15 @@ add_gen_header(
     .llvm-libc-types.tcflag_t
 )
 
+add_gen_header(
+  wchar
+  DEF_FILE wchar.h.def
+  GEN_HDR wchar.h
+  DEPENDS
+    .llvm_libc_common_h
+    .llvm-libc-macros.wchar_macros
+)
+
 if(NOT LLVM_LIBC_FULL_BUILD)
   # We don't install headers in non-fullbuild mode.
   return()

diff  --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt
index 74a5977641f48..58bbf5e2e4317 100644
--- a/libc/include/llvm-libc-macros/CMakeLists.txt
+++ b/libc/include/llvm-libc-macros/CMakeLists.txt
@@ -166,3 +166,9 @@ add_header(
   DEPENDS
     .linux.unistd_macros
 )
+
+add_header(
+  wchar_macros
+  HDR
+    wchar-macros.h
+)

diff  --git a/libc/include/llvm-libc-macros/wchar-macros.h b/libc/include/llvm-libc-macros/wchar-macros.h
new file mode 100644
index 0000000000000..adca41eb01227
--- /dev/null
+++ b/libc/include/llvm-libc-macros/wchar-macros.h
@@ -0,0 +1,16 @@
+//===-- Macros defined in wchar.h header file -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_MACROS_WCHAR_MACROS_H
+#define __LLVM_LIBC_MACROS_WCHAR_MACROS_H
+
+#ifndef WEOF
+#define WEOF 0xffffffffu
+#endif
+
+#endif // __LLVM_LIBC_MACROS_WCHAR_MACROS_H

diff  --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 7be24415b41b3..1df2e7c962c1b 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -84,3 +84,5 @@ add_header(speed_t HDR speed_t.h)
 add_header(tcflag_t HDR tcflag_t.h)
 add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
 add_header(__getoptargv_t HDR __getoptargv_t.h)
+add_header(wchar_t HDR wchar_t.h)
+add_header(wint_t HDR wint_t.h)

diff  --git a/libc/include/llvm-libc-types/size_t.h b/libc/include/llvm-libc-types/size_t.h
index ce055d0bc3ba7..8eaf194e05727 100644
--- a/libc/include/llvm-libc-types/size_t.h
+++ b/libc/include/llvm-libc-types/size_t.h
@@ -11,8 +11,9 @@
 
 // Since __need_size_t is defined, we get the definition of size_t from the
 // standalone C header stddef.h. Also, because __need_size_t is defined,
-// including stddef.h will pull only the type size_t and nothing else.a
+// including stddef.h will pull only the type size_t and nothing else.
 #define __need_size_t
 #include <stddef.h>
+#undef __need_size_t
 
 #endif // __LLVM_LIBC_TYPES_SIZE_T_H__

diff  --git a/libc/include/llvm-libc-types/wchar_t.h b/libc/include/llvm-libc-types/wchar_t.h
new file mode 100644
index 0000000000000..9efb5cd8e6652
--- /dev/null
+++ b/libc/include/llvm-libc-types/wchar_t.h
@@ -0,0 +1,19 @@
+//===-- Definition of wchar_t types ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_WCHAR_T_H__
+#define __LLVM_LIBC_TYPES_WCHAR_T_H__
+
+// Since __need_wchar_t is defined, we get the definition of wchar_t from the
+// standalone C header stddef.h. Also, because __need_wchar_t is defined,
+// including stddef.h will pull only the type wchar_t and nothing else.
+#define __need_wchar_t
+#include <stddef.h>
+#undef __need_wchar_t
+
+#endif // __LLVM_LIBC_TYPES_WCHAR_T_H__

diff  --git a/libc/include/llvm-libc-types/wint_t.h b/libc/include/llvm-libc-types/wint_t.h
new file mode 100644
index 0000000000000..cf6ccd7e1ae76
--- /dev/null
+++ b/libc/include/llvm-libc-types/wint_t.h
@@ -0,0 +1,19 @@
+//===-- Definition of wint_t types ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_WINT_T_H__
+#define __LLVM_LIBC_TYPES_WINT_T_H__
+
+// Since __need_wint_t is defined, we get the definition of wint_t from the
+// standalone C header stddef.h. Also, because __need_wint_t is defined,
+// including stddef.h will pull only the type wint_t and nothing else.
+#define __need_wint_t
+#include <stddef.h>
+#undef __need_wint_t
+
+#endif // __LLVM_LIBC_TYPES_WINT_T_H__

diff  --git a/libc/include/wchar.h.def b/libc/include/wchar.h.def
new file mode 100644
index 0000000000000..ac72f80aa0833
--- /dev/null
+++ b/libc/include/wchar.h.def
@@ -0,0 +1,17 @@
+//===-- C standard library header wchar.h ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_WCHAR_H
+#define LLVM_LIBC_WCHAR_H
+
+#include <__llvm-libc-common.h>
+#include <llvm-libc-macros/wchar-macros.h>
+
+%%public_api()
+
+#endif // LLVM_LIBC_WCHAR_H

diff  --git a/libc/spec/spec.td b/libc/spec/spec.td
index 87bafb087d3fb..7a691f255e0b7 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -59,6 +59,10 @@ def SizeTType : NamedType<"size_t">;
 def SizeTPtr : PtrType<SizeTType>;
 def RestrictedSizeTPtr : RestrictedPtrType<SizeTType>;
 
+def WCharType : NamedType<"wchar_t">;
+def WIntType : NamedType<"wint_t">;
+def MBStateType : NamedType<"mbstate_t">;
+
 def LongDoublePtr : PtrType<LongDoubleType>;
 
 def IntMaxTType : NamedType<"intmax_t">;

diff  --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 84277f099a13c..fd732b74fae1e 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -1069,6 +1069,29 @@ def StdC : StandardSpec<"stdc"> {
       ]
   >;
 
+  HeaderSpec WChar = HeaderSpec<
+      "wchar.h",
+      [ // Macros
+        Macro<"WEOF">,
+      ], 
+      [ //Types
+        SizeTType,
+        WIntType,
+        WCharType,
+        MBStateType,
+        StructTmType,
+      ],
+      [], // Enumerations
+      [
+          FunctionSpec<
+              "wctob",
+              RetValSpec<IntType>,
+              [ArgSpec<WIntType>]
+          >,
+      ]
+  >;
+
+
   let Headers = [
     Assert,
     CType,
@@ -1083,5 +1106,6 @@ def StdC : StandardSpec<"stdc"> {
     Signal,
     Threads,
     Time,
+    WChar,
   ];
 }

diff  --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 59e7d06871787..9f2f45165e408 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -8,6 +8,7 @@ add_subdirectory(math)
 add_subdirectory(string)
 add_subdirectory(stdlib)
 add_subdirectory(stdio)
+add_subdirectory(wchar)
 
 if(${LIBC_TARGET_OS} STREQUAL "linux")
   add_subdirectory(dirent)

diff  --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index e4eb354aefcd3..30bbf3ae96d26 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -51,6 +51,12 @@ add_header_library(
     ctype_utils.h
 )
 
+add_header_library(
+  wctype_utils
+  HDRS
+    wctype_utils.h
+)
+
 add_header_library(
   str_to_num_result
   HDRS

diff  --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h
new file mode 100644
index 0000000000000..7e11b2d8b1725
--- /dev/null
+++ b/libc/src/__support/wctype_utils.h
@@ -0,0 +1,45 @@
+//===-- Collection of utils for implementing wide char functions --*-C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H
+#define LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H
+
+#include "src/__support/CPP/optional.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+
+#define __need_wint_t
+#define __need_wchar_t
+#include <stddef.h> // needed for wint_t and wchar_t
+
+namespace __llvm_libc {
+namespace internal {
+
+// ------------------------------------------------------
+// Rationale: Since these classification functions are
+// called in other functions, we will avoid the overhead
+// of a function call by inlining them.
+// ------------------------------------------------------
+
+LIBC_INLINE cpp::optional<int> wctob(wint_t c) {
+  // This needs to be translated to EOF at the callsite. This is to avoid
+  // including stdio.h in this file.
+  if (c > 127 || c < 0)
+    return cpp::nullopt;
+  return static_cast<int>(c);
+}
+
+LIBC_INLINE cpp::optional<wint_t> btowc(int c) {
+  if (c > 127 || c < 0)
+    return cpp::nullopt;
+  return static_cast<wint_t>(c);
+}
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H

diff  --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..58e168266829c
--- /dev/null
+++ b/libc/src/wchar/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+add_entrypoint_object(
+  wctob
+  SRCS
+    wctob.cpp
+  HDRS
+    wctob.h
+  DEPENDS 
+    libc.src.__support.wctype_utils
+)

diff  --git a/libc/src/wchar/btowc.cpp b/libc/src/wchar/btowc.cpp
new file mode 100644
index 0000000000000..6db888c1aa09c
--- /dev/null
+++ b/libc/src/wchar/btowc.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of btowc -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/btowc.h"
+#include "src/__support/common.h"
+#include "src/__support/wctype_utils.h"
+
+#include <stdio.h> // for EOF.
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(int, btowc, (wint_t c)) {
+  auto result = internal::btowc(c);
+  if (result.has_value()) {
+    return result.value();
+  } else {
+    return WEOF;
+  }
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/wchar/btowc.h b/libc/src/wchar/btowc.h
new file mode 100644
index 0000000000000..ab37738e8c67c
--- /dev/null
+++ b/libc/src/wchar/btowc.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for btowc -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_BTOWC_H
+#define LLVM_LIBC_SRC_WCHAR_BTOWC_H
+
+#include <wchar.h>
+
+namespace __llvm_libc {
+
+wint_t btowc(int c);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_WCHAR_BTOWC_H

diff  --git a/libc/src/wchar/wctob.cpp b/libc/src/wchar/wctob.cpp
new file mode 100644
index 0000000000000..274818a37773a
--- /dev/null
+++ b/libc/src/wchar/wctob.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of wctob -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wctob.h"
+#include "src/__support/common.h"
+#include "src/__support/wctype_utils.h"
+
+#include <stdio.h> // for EOF.
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(int, wctob, (wint_t c)) {
+  auto result = internal::wctob(c);
+  if (result.has_value()) {
+    return result.value();
+  } else {
+    return EOF;
+  }
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/wchar/wctob.h b/libc/src/wchar/wctob.h
new file mode 100644
index 0000000000000..5c0f229436d62
--- /dev/null
+++ b/libc/src/wchar/wctob.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for wctob -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCTOB_H
+#define LLVM_LIBC_SRC_WCHAR_WCTOB_H
+
+#include <wchar.h>
+
+namespace __llvm_libc {
+
+int wctob(wint_t c);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCTOB_H

diff  --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index 6c3c987e3291d..6b1c7452fc284 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -35,6 +35,7 @@ add_subdirectory(string)
 add_subdirectory(stdlib)
 add_subdirectory(inttypes)
 add_subdirectory(stdio)
+add_subdirectory(wchar)
 
 if(${LIBC_TARGET_OS} STREQUAL "linux")
   add_subdirectory(fcntl)

diff  --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..88b68c43491a6
--- /dev/null
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_libc_testsuite(libc_wchar_unittests)
+
+add_libc_unittest(
+  btowc_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    btowc_test.cpp
+  DEPENDS
+    libc.src.wchar.btowc
+)
+
+add_libc_unittest(
+  wctob_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wctob_test.cpp
+  DEPENDS
+    libc.src.wchar.wctob
+)

diff  --git a/libc/test/src/wchar/btowc_test.cpp b/libc/test/src/wchar/btowc_test.cpp
new file mode 100644
index 0000000000000..cfac485cdecc7
--- /dev/null
+++ b/libc/test/src/wchar/btowc_test.cpp
@@ -0,0 +1,24 @@
+//===-- Unittests for btowc ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <wchar.h> //for WEOF
+
+#include "src/wchar/btowc.h"
+
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcBtowc, DefaultLocale) {
+  // Loops through all characters, verifying that ascii returns itself and
+  // everything else returns WEOF.
+  for (int c = 0; c < 255; ++c) {
+    if (c < 128)
+      EXPECT_EQ(__llvm_libc::btowc(c), static_cast<wint_t>(c));
+    else
+      EXPECT_EQ(__llvm_libc::btowc(c), WEOF);
+  }
+}

diff  --git a/libc/test/src/wchar/wctob_test.cpp b/libc/test/src/wchar/wctob_test.cpp
new file mode 100644
index 0000000000000..7a8d6eea0c82a
--- /dev/null
+++ b/libc/test/src/wchar/wctob_test.cpp
@@ -0,0 +1,24 @@
+//===-- Unittests for wctob ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h> //for EOF
+
+#include "src/wchar/wctob.h"
+
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWctob, DefaultLocale) {
+  // Loops through a subset of the wide characters, verifying that ascii returns
+  // itself and everything else returns EOF.
+  for (wint_t c = 0; c < 32767; ++c) {
+    if (c < 128)
+      EXPECT_EQ(__llvm_libc::wctob(c), static_cast<int>(c));
+    else
+      EXPECT_EQ(__llvm_libc::wctob(c), EOF);
+  }
+}


        


More information about the libc-commits mailing list