[llvm-branch-commits] [libcxx] de698ae - [libcxx] Convert paths to/from the right narrow code page for narrow strings on windows

Martin Storsjö via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Dec 18 01:30:04 PST 2020


Author: Martin Storsjö
Date: 2020-12-18T11:24:52+02:00
New Revision: de698ae73444b5160dd6b8d768b30d6764be004e

URL: https://github.com/llvm/llvm-project/commit/de698ae73444b5160dd6b8d768b30d6764be004e
DIFF: https://github.com/llvm/llvm-project/commit/de698ae73444b5160dd6b8d768b30d6764be004e.diff

LOG: [libcxx] Convert paths to/from the right narrow code page for narrow strings on windows

On windows, the narrow, char based paths normally don't use utf8, but
can use many different native code pages, and this is what system
functions that operate on files, taking such paths/file names, interpret
them as.

Differential Revision: https://reviews.llvm.org/D91137

Added: 
    

Modified: 
    libcxx/include/filesystem
    libcxx/src/filesystem/filesystem_common.h
    libcxx/src/filesystem/operations.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/filesystem b/libcxx/include/filesystem
index adc863f052db..e39790c50955 100644
--- a/libcxx/include/filesystem
+++ b/libcxx/include/filesystem
@@ -690,6 +690,13 @@ typedef string __path_string;
 typedef char __path_value;
 #endif
 
+#if defined(_LIBCPP_WIN32API)
+_LIBCPP_FUNC_VIS
+size_t __wide_to_char(const wstring&, char*, size_t);
+_LIBCPP_FUNC_VIS
+size_t __char_to_wide(const string&, wchar_t*, size_t);
+#endif
+
 template <class _ECharT>
 struct _PathCVT;
 
@@ -793,6 +800,48 @@ struct _PathCVT<__path_value> {
 };
 
 #if defined(_LIBCPP_WIN32API)
+template <>
+struct _PathCVT<char> {
+
+  static void
+  __append_string(__path_string& __dest, const basic_string<char> &__str) {
+      size_t __size = __char_to_wide(__str, nullptr, 0);
+      size_t __pos = __dest.size();
+      __dest.resize(__pos + __size);
+      __char_to_wide(__str, const_cast<__path_value*>(__dest.data()) + __pos, __size);
+  }
+
+  template <class _Iter>
+  static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type
+  __append_range(__path_string& __dest, _Iter __b, _Iter __e) {
+    basic_string<char> __tmp(__b, __e);
+    __append_string(__dest, __tmp);
+  }
+
+  template <class _Iter>
+  static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type
+  __append_range(__path_string& __dest, _Iter __b, _Iter __e) {
+    basic_string<char> __tmp(__b, __e);
+    __append_string(__dest, __tmp);
+  }
+
+  template <class _Iter>
+  static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) {
+    const char __sentinel = char{};
+    basic_string<char> __tmp;
+    for (; *__b != __sentinel; ++__b)
+      __tmp.push_back(*__b);
+    __append_string(__dest, __tmp);
+  }
+
+  template <class _Source>
+  static void __append_source(__path_string& __dest, _Source const& __s) {
+    using _Traits = __is_pathable<_Source>;
+    __append_range(__dest, _Traits::__range_begin(__s),
+                   _Traits::__range_end(__s));
+  }
+};
+
 template <class _ECharT>
 struct _PathExport {
   typedef __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Narrower;
@@ -806,6 +855,17 @@ struct _PathExport {
   }
 };
 
+template <>
+struct _PathExport<char> {
+  template <class _Str>
+  static void __append(_Str& __dest, const __path_string& __src) {
+    size_t __size = __wide_to_char(__src, nullptr, 0);
+    size_t __pos = __dest.size();
+    __dest.resize(__size);
+    __wide_to_char(__src, const_cast<char*>(__dest.data()) + __pos, __size);
+  }
+};
+
 template <>
 struct _PathExport<wchar_t> {
   template <class _Str>
@@ -1110,7 +1170,11 @@ public:
     return string<char>();
   }
   _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const {
-    return string<__u8_string::value_type>();
+    using _CVT = __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
+    __u8_string __s;
+    __s.reserve(__pn_.size());
+    _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size());
+    return __s;
   }
 
   _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const {
@@ -1373,9 +1437,42 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
       is_same<typename __is_pathable<_InputIt>::__char_type, char>::value,
       "u8path(Iter, Iter) requires Iter have a value_type of type 'char'"
       " or 'char8_t'");
+#if defined(_LIBCPP_WIN32API)
+  string __tmp(__f, __l);
+  using _CVT = __widen_from_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
+  _VSTD::wstring __w;
+  __w.reserve(__tmp.size());
+  _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size());
+  return path(__w);
+#else
   return path(__f, __l);
+#endif /* !_LIBCPP_WIN32API */
 }
 
+#if defined(_LIBCPP_WIN32API)
+template <class _InputIt>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
+    typename enable_if<__is_pathable<_InputIt>::value, path>::type
+    u8path(_InputIt __f, _NullSentinel) {
+  static_assert(
+#ifndef _LIBCPP_NO_HAS_CHAR8_T
+      is_same<typename __is_pathable<_InputIt>::__char_type, char8_t>::value ||
+#endif
+      is_same<typename __is_pathable<_InputIt>::__char_type, char>::value,
+      "u8path(Iter, Iter) requires Iter have a value_type of type 'char'"
+      " or 'char8_t'");
+  string __tmp;
+  const char __sentinel = char{};
+  for (; *__f != __sentinel; ++__f)
+    __tmp.push_back(*__f);
+  using _CVT = __widen_from_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
+  _VSTD::wstring __w;
+  __w.reserve(__tmp.size());
+  _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size());
+  return path(__w);
+}
+#endif /* _LIBCPP_WIN32API */
+
 template <class _Source>
 _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
     typename enable_if<__is_pathable<_Source>::value, path>::type
@@ -1387,7 +1484,12 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
       is_same<typename __is_pathable<_Source>::__char_type, char>::value,
       "u8path(Source const&) requires Source have a character type of type "
       "'char' or 'char8_t'");
+#if defined(_LIBCPP_WIN32API)
+  using _Traits = __is_pathable<_Source>;
+  return u8path(__unwrap_iter(_Traits::__range_begin(__s)), __unwrap_iter(_Traits::__range_end(__s)));
+#else
   return path(__s);
+#endif
 }
 
 class _LIBCPP_TYPE_VIS path::iterator {

diff  --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h
index 4175ea57464f..5376e32ffa02 100644
--- a/libcxx/src/filesystem/filesystem_common.h
+++ b/libcxx/src/filesystem/filesystem_common.h
@@ -126,6 +126,12 @@ template <>
 bool error_value<bool>() {
   return false;
 }
+#if __SIZEOF_SIZE_T__ != __SIZEOF_LONG_LONG__
+template <>
+size_t error_value<size_t>() {
+  return size_t(-1);
+}
+#endif
 template <>
 uintmax_t error_value<uintmax_t>() {
   return uintmax_t(-1);

diff  --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index 12592f2cc026..70b531e8ea3b 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -17,9 +17,15 @@
 
 #include "filesystem_common.h"
 
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/statvfs.h>
+#if defined(_LIBCPP_WIN32API)
+# define WIN32_LEAN_AND_MEAN
+# define NOMINMAX
+# include <windows.h>
+#else
+# include <unistd.h>
+# include <sys/stat.h>
+# include <sys/statvfs.h>
+#endif
 #include <time.h>
 #include <fcntl.h> /* values for fchmodat */
 
@@ -1680,6 +1686,36 @@ path::iterator& path::iterator::__decrement() {
   return *this;
 }
 
+#if defined(_LIBCPP_WIN32API)
+////////////////////////////////////////////////////////////////////////////
+// Windows path conversions
+size_t __wide_to_char(const wstring &str, char *out, size_t outlen) {
+  if (str.empty())
+    return 0;
+  ErrorHandler<size_t> err("__wide_to_char", nullptr);
+  UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
+  BOOL used_default = FALSE;
+  int ret = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out,
+                                outlen, nullptr, &used_default);
+  if (ret <= 0 || used_default)
+    return err.report(errc::illegal_byte_sequence);
+  return ret;
+}
+
+size_t __char_to_wide(const string &str, wchar_t *out, size_t outlen) {
+  if (str.empty())
+    return 0;
+  ErrorHandler<size_t> err("__char_to_wide", nullptr);
+  UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
+  int ret = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(),
+                                str.size(), out, outlen);
+  if (ret <= 0)
+    return err.report(errc::illegal_byte_sequence);
+  return ret;
+}
+#endif
+
+
 ///////////////////////////////////////////////////////////////////////////////
 //                           directory entry definitions
 ///////////////////////////////////////////////////////////////////////////////


        


More information about the llvm-branch-commits mailing list