[libcxx-commits] [libcxx] e83e0ca - [libcxx] Make filesystem::path::value_type wchar_t on windows

Martin Storsjö via libcxx-commits libcxx-commits at lists.llvm.org
Fri Dec 18 01:25:18 PST 2020


Author: Martin Storsjö
Date: 2020-12-18T11:24:52+02:00
New Revision: e83e0cac041bc071301f8399bb5c32b2529fc83f

URL: https://github.com/llvm/llvm-project/commit/e83e0cac041bc071301f8399bb5c32b2529fc83f
DIFF: https://github.com/llvm/llvm-project/commit/e83e0cac041bc071301f8399bb5c32b2529fc83f.diff

LOG: [libcxx] Make filesystem::path::value_type wchar_t on windows

Also set the preferred separator to backslash.

libc++ doesn't compile successfully for windows prior to this change,
and this change on its own isn't enough to make it compile successfully
either, but is the first stepping stone towards making it work correctly.

Most of operations.cpp will need to be touched, both for calling
functions that take wchar paths, but also for using other windows
specific functions instead of the posix functions used so far; that is
handled in later commits.

Changing parts of operations.cpp to generalize the string type handling
in code that doesn't touch system functions.

Differential Revision: https://reviews.llvm.org/D91135

Added: 
    

Modified: 
    libcxx/include/filesystem
    libcxx/src/filesystem/filesystem_common.h
    libcxx/src/filesystem/operations.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/filesystem b/libcxx/include/filesystem
index 764ec6573a50..7aeabeb2d1ec 100644
--- a/libcxx/include/filesystem
+++ b/libcxx/include/filesystem
@@ -568,9 +568,19 @@ struct __can_convert_char<char32_t> {
 template <class _ECharT>
 typename enable_if<__can_convert_char<_ECharT>::value, bool>::type
 __is_separator(_ECharT __e) {
+#if defined(_LIBCPP_WIN32API)
+  return __e == _ECharT('/') || __e == _ECharT('\\');
+#else
   return __e == _ECharT('/');
+#endif
 }
 
+#ifndef _LIBCPP_NO_HAS_CHAR8_T
+typedef u8string __u8_string;
+#else
+typedef string __u8_string;
+#endif
+
 struct _NullSentinel {};
 
 template <class _Tp>
@@ -672,6 +682,14 @@ struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> {
 template <class _Tp>
 struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {};
 
+#if defined(_LIBCPP_WIN32API)
+typedef wstring __path_string;
+typedef wchar_t __path_value;
+#else
+typedef string __path_string;
+typedef char __path_value;
+#endif
+
 template <class _ECharT>
 struct _PathCVT;
 
@@ -682,24 +700,40 @@ struct _PathCVT {
                 "Char type not convertible");
 
   typedef __narrow_to_utf8<sizeof(_ECharT) * __CHAR_BIT__> _Narrower;
+#if defined(_LIBCPP_WIN32API)
+  typedef __widen_from_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Widener;
+#endif
 
-  static void __append_range(string& __dest, _ECharT const* __b,
+  static void __append_range(__path_string& __dest, _ECharT const* __b,
                              _ECharT const* __e) {
+#if defined(_LIBCPP_WIN32API)
+    string __utf8;
+    _Narrower()(back_inserter(__utf8), __b, __e);
+    _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size());
+#else
     _Narrower()(back_inserter(__dest), __b, __e);
+#endif
   }
 
   template <class _Iter>
-  static void __append_range(string& __dest, _Iter __b, _Iter __e) {
+  static void __append_range(__path_string& __dest, _Iter __b, _Iter __e) {
     static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload");
     if (__b == __e)
       return;
     basic_string<_ECharT> __tmp(__b, __e);
+#if defined(_LIBCPP_WIN32API)
+    string __utf8;
+    _Narrower()(back_inserter(__utf8), __tmp.data(),
+                __tmp.data() + __tmp.length());
+    _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size());
+#else
     _Narrower()(back_inserter(__dest), __tmp.data(),
                 __tmp.data() + __tmp.length());
+#endif
   }
 
   template <class _Iter>
-  static void __append_range(string& __dest, _Iter __b, _NullSentinel) {
+  static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) {
     static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload");
     const _ECharT __sentinel = _ECharT{};
     if (*__b == __sentinel)
@@ -707,12 +741,19 @@ struct _PathCVT {
     basic_string<_ECharT> __tmp;
     for (; *__b != __sentinel; ++__b)
       __tmp.push_back(*__b);
+#if defined(_LIBCPP_WIN32API)
+    string __utf8;
+    _Narrower()(back_inserter(__utf8), __tmp.data(),
+                __tmp.data() + __tmp.length());
+    _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size());
+#else
     _Narrower()(back_inserter(__dest), __tmp.data(),
                 __tmp.data() + __tmp.length());
+#endif
   }
 
   template <class _Source>
-  static void __append_source(string& __dest, _Source const& __s) {
+  static void __append_source(__path_string& __dest, _Source const& __s) {
     using _Traits = __is_pathable<_Source>;
     __append_range(__dest, _Traits::__range_begin(__s),
                    _Traits::__range_end(__s));
@@ -721,36 +762,79 @@ struct _PathCVT {
 #endif // !_LIBCPP_HAS_NO_LOCALIZATION
 
 template <>
-struct _PathCVT<char> {
+struct _PathCVT<__path_value> {
 
   template <class _Iter>
   static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type
-  __append_range(string& __dest, _Iter __b, _Iter __e) {
+  __append_range(__path_string& __dest, _Iter __b, _Iter __e) {
     for (; __b != __e; ++__b)
       __dest.push_back(*__b);
   }
 
   template <class _Iter>
   static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type
-  __append_range(string& __dest, _Iter __b, _Iter __e) {
+  __append_range(__path_string& __dest, _Iter __b, _Iter __e) {
     __dest.__append_forward_unsafe(__b, __e);
   }
 
   template <class _Iter>
-  static void __append_range(string& __dest, _Iter __b, _NullSentinel) {
+  static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) {
     const char __sentinel = char{};
     for (; *__b != __sentinel; ++__b)
       __dest.push_back(*__b);
   }
 
   template <class _Source>
-  static void __append_source(string& __dest, _Source const& __s) {
+  static void __append_source(__path_string& __dest, _Source const& __s) {
     using _Traits = __is_pathable<_Source>;
     __append_range(__dest, _Traits::__range_begin(__s),
                    _Traits::__range_end(__s));
   }
 };
 
+#if defined(_LIBCPP_WIN32API)
+template <class _ECharT>
+struct _PathExport {
+  typedef __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Narrower;
+  typedef __widen_from_utf8<sizeof(_ECharT) * __CHAR_BIT__> _Widener;
+
+  template <class _Str>
+  static void __append(_Str& __dest, const __path_string& __src) {
+    string __utf8;
+    _Narrower()(back_inserter(__utf8), __src.data(), __src.data() + __src.size());
+    _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size());
+  }
+};
+
+template <>
+struct _PathExport<wchar_t> {
+  template <class _Str>
+  static void __append(_Str& __dest, const __path_string& __src) {
+    __dest.append(__src.begin(), __src.end());
+  }
+};
+
+template <>
+struct _PathExport<char16_t> {
+  template <class _Str>
+  static void __append(_Str& __dest, const __path_string& __src) {
+    __dest.append(__src.begin(), __src.end());
+  }
+};
+
+#ifndef _LIBCPP_NO_HAS_CHAR8_T
+template <>
+struct _PathExport<char8_t> {
+  typedef __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Narrower;
+
+  template <class _Str>
+  static void __append(_Str& __dest, const __path_string& __src) {
+    _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size());
+  }
+};
+#endif /* !_LIBCPP_NO_HAS_CHAR8_T */
+#endif /* _LIBCPP_WIN32API */
+
 class _LIBCPP_TYPE_VIS path {
   template <class _SourceOrIter, class _Tp = path&>
   using _EnableIfPathable =
@@ -763,10 +847,15 @@ class _LIBCPP_TYPE_VIS path {
   using _SourceCVT = _PathCVT<_SourceChar<_Tp> >;
 
 public:
+#if defined(_LIBCPP_WIN32API)
+  typedef wchar_t value_type;
+  static constexpr value_type preferred_separator = L'\\';
+#else
   typedef char value_type;
-  typedef basic_string<value_type> string_type;
-  typedef _VSTD::string_view __string_view;
   static constexpr value_type preferred_separator = '/';
+#endif
+  typedef basic_string<value_type> string_type;
+  typedef basic_string_view<value_type> __string_view;
 
   enum class _LIBCPP_ENUM_VIS format : unsigned char {
     auto_format,
@@ -1000,6 +1089,52 @@ public:
 
   _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; }
 
+#if defined(_LIBCPP_WIN32API)
+  _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { return __pn_; }
+
+  _VSTD::wstring generic_wstring() const { return __pn_; }
+
+#if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  template <class _ECharT, class _Traits = char_traits<_ECharT>,
+            class _Allocator = allocator<_ECharT> >
+  basic_string<_ECharT, _Traits, _Allocator>
+  string(const _Allocator& __a = _Allocator()) const {
+    using _Str = basic_string<_ECharT, _Traits, _Allocator>;
+    _Str __s(__a);
+    __s.reserve(__pn_.size());
+    _PathExport<_ECharT>::__append(__s, __pn_);
+    return __s;
+  }
+
+  _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const {
+    return string<char>();
+  }
+  _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const {
+    return string<__u8_string::value_type>();
+  }
+
+  _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const {
+    return string<char16_t>();
+  }
+  _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const {
+    return string<char32_t>();
+  }
+
+  // generic format observers
+  template <class _ECharT, class _Traits = char_traits<_ECharT>,
+            class _Allocator = allocator<_ECharT> >
+  basic_string<_ECharT, _Traits, _Allocator>
+  generic_string(const _Allocator& __a = _Allocator()) const {
+    return string<_ECharT, _Traits, _Allocator>(__a);
+  }
+
+  _VSTD::string generic_string() const { return generic_string<char>(); }
+  _VSTD::u16string generic_u16string() const { return generic_string<char16_t>(); }
+  _VSTD::u32string generic_u32string() const { return generic_string<char32_t>(); }
+  __u8_string generic_u8string() const { return u8string(); }
+#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */
+#else /* _LIBCPP_WIN32API */
+
   _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { return __pn_; }
 #ifndef _LIBCPP_NO_HAS_CHAR8_T
   _LIBCPP_INLINE_VISIBILITY _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); }
@@ -1029,7 +1164,7 @@ public:
   _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const {
     return string<char32_t>();
   }
-#endif
+#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */
 
   // generic format observers
   _VSTD::string generic_string() const { return __pn_; }
@@ -1050,7 +1185,8 @@ public:
   _VSTD::wstring generic_wstring() const { return string<wchar_t>(); }
   _VSTD::u16string generic_u16string() const { return string<char16_t>(); }
   _VSTD::u32string generic_u32string() const { return string<char32_t>(); }
-#endif
+#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */
+#endif /* !_LIBCPP_WIN32API */
 
 private:
   int __compare(__string_view) const;
@@ -1157,8 +1293,8 @@ public:
 #if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
   template <class _CharT, class _Traits>
   _LIBCPP_INLINE_VISIBILITY friend
-      typename enable_if<is_same<_CharT, char>::value &&
-                             is_same<_Traits, char_traits<char> >::value,
+      typename enable_if<is_same<_CharT, value_type>::value &&
+                             is_same<_Traits, char_traits<value_type> >::value,
                          basic_ostream<_CharT, _Traits>&>::type
       operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) {
     __os << _VSTD::__quoted(__p.native());
@@ -1167,8 +1303,8 @@ public:
 
   template <class _CharT, class _Traits>
   _LIBCPP_INLINE_VISIBILITY friend
-      typename enable_if<!is_same<_CharT, char>::value ||
-                             !is_same<_Traits, char_traits<char> >::value,
+      typename enable_if<!is_same<_CharT, value_type>::value ||
+                             !is_same<_Traits, char_traits<value_type> >::value,
                          basic_ostream<_CharT, _Traits>&>::type
       operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) {
     __os << _VSTD::__quoted(__p.string<_CharT, _Traits>());

diff  --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h
index a82f6511368a..4175ea57464f 100644
--- a/libcxx/src/filesystem/filesystem_common.h
+++ b/libcxx/src/filesystem/filesystem_common.h
@@ -38,6 +38,12 @@
 #pragma GCC diagnostic ignored "-Wunused-function"
 #endif
 
+#if defined(_LIBCPP_WIN32API)
+#define PS(x) (L##x)
+#else
+#define PS(x) (x)
+#endif
+
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
 namespace detail {
@@ -94,8 +100,8 @@ static string format_string_imp(const char* msg, ...) {
   return result;
 }
 
-const char* unwrap(string const& s) { return s.c_str(); }
-const char* unwrap(path const& p) { return p.native().c_str(); }
+const path::value_type* unwrap(path::string_type const& s) { return s.c_str(); }
+const path::value_type* unwrap(path const& p) { return p.native().c_str(); }
 template <class Arg>
 Arg const& unwrap(Arg const& a) {
   static_assert(!is_class<Arg>::value, "cannot pass class here");

diff  --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index fb27d54cf653..12592f2cc026 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -171,11 +171,14 @@ struct PathParser {
     switch (State) {
     case PS_BeforeBegin:
     case PS_AtEnd:
-      return "";
+      return PS("");
     case PS_InRootDir:
-      return "/";
+      if (RawEntry[0] == '\\')
+        return PS("\\");
+      else
+        return PS("/");
     case PS_InTrailingSep:
-      return "";
+      return PS("");
     case PS_InRootName:
     case PS_InFilenames:
       return RawEntry;
@@ -283,8 +286,8 @@ struct PathParser {
 };
 
 string_view_pair separate_filename(string_view_t const& s) {
-  if (s == "." || s == ".." || s.empty())
-    return string_view_pair{s, ""};
+  if (s == PS(".") || s == PS("..") || s.empty())
+    return string_view_pair{s, PS("")};
   auto pos = s.find_last_of('.');
   if (pos == string_view_t::npos || pos == 0)
     return string_view_pair{s, string_view_t{}};
@@ -495,19 +498,25 @@ _FilesystemClock::time_point _FilesystemClock::now() noexcept {
 
 filesystem_error::~filesystem_error() {}
 
+#if defined(_LIBCPP_WIN32API)
+#define PS_FMT "%ls"
+#else
+#define PS_FMT "%s"
+#endif
+
 void filesystem_error::__create_what(int __num_paths) {
   const char* derived_what = system_error::what();
   __storage_->__what_ = [&]() -> string {
-    const char* p1 = path1().native().empty() ? "\"\"" : path1().c_str();
-    const char* p2 = path2().native().empty() ? "\"\"" : path2().c_str();
+    const path::value_type* p1 = path1().native().empty() ? PS("\"\"") : path1().c_str();
+    const path::value_type* p2 = path2().native().empty() ? PS("\"\"") : path2().c_str();
     switch (__num_paths) {
     default:
       return detail::format_string("filesystem error: %s", derived_what);
     case 1:
-      return detail::format_string("filesystem error: %s [%s]", derived_what,
+      return detail::format_string("filesystem error: %s [" PS_FMT "]", derived_what,
                                    p1);
     case 2:
-      return detail::format_string("filesystem error: %s [%s] [%s]",
+      return detail::format_string("filesystem error: %s [" PS_FMT "] [" PS_FMT "]",
                                    derived_what, p1, p2);
     }
   }();
@@ -1222,10 +1231,10 @@ path __temp_directory_path(error_code* ec) {
   error_code m_ec;
   file_status st = detail::posix_stat(p, &m_ec);
   if (!status_known(st))
-    return err.report(m_ec, "cannot access path \"%s\"", p);
+    return err.report(m_ec, "cannot access path \"" PS_FMT "\"", p);
 
   if (!exists(st) || !is_directory(st))
-    return err.report(errc::not_a_directory, "path \"%s\" is not a directory",
+    return err.report(errc::not_a_directory, "path \"" PS_FMT "\" is not a directory",
                       p);
 
   return p;
@@ -1281,7 +1290,7 @@ path& path::replace_extension(path const& replacement) {
   }
   if (!replacement.empty()) {
     if (replacement.native()[0] != '.') {
-      __pn_ += ".";
+      __pn_ += PS(".");
     }
     __pn_.append(replacement.__pn_);
   }
@@ -1403,11 +1412,11 @@ enum PathPartKind : unsigned char {
 static PathPartKind ClassifyPathPart(string_view_t Part) {
   if (Part.empty())
     return PK_TrailingSep;
-  if (Part == ".")
+  if (Part == PS("."))
     return PK_Dot;
-  if (Part == "..")
+  if (Part == PS(".."))
     return PK_DotDot;
-  if (Part == "/")
+  if (Part == PS("/"))
     return PK_RootSep;
   return PK_Filename;
 }
@@ -1456,7 +1465,7 @@ path path::lexically_normal() const {
         NewPathSize -= Parts.back().first.size();
         Parts.pop_back();
       } else if (LastKind != PK_RootSep)
-        AddPart(PK_DotDot, "..");
+        AddPart(PK_DotDot, PS(".."));
       MaybeNeedTrailingSep = LastKind == PK_Filename;
       break;
     }
@@ -1471,7 +1480,7 @@ path path::lexically_normal() const {
   }
   // [fs.path.generic]p6.8: If the path is empty, add a dot.
   if (Parts.empty())
-    return ".";
+    return PS(".");
 
   // [fs.path.generic]p6.7: If the last filename is dot-dot, remove any
   // trailing directory-separator.
@@ -1483,7 +1492,7 @@ path path::lexically_normal() const {
     Result /= PK.first;
 
   if (NeedTrailingSep)
-    Result /= "";
+    Result /= PS("");
 
   return Result;
 }
@@ -1492,9 +1501,9 @@ static int DetermineLexicalElementCount(PathParser PP) {
   int Count = 0;
   for (; PP; ++PP) {
     auto Elem = *PP;
-    if (Elem == "..")
+    if (Elem == PS(".."))
       --Count;
-    else if (Elem != "." && Elem != "")
+    else if (Elem != PS(".") && Elem != PS(""))
       ++Count;
   }
   return Count;
@@ -1541,15 +1550,15 @@ path path::lexically_relative(const path& base) const {
     return {};
 
   // if n == 0 and (a == end() || a->empty()), returns path("."); otherwise
-  if (ElemCount == 0 && (PP.atEnd() || *PP == ""))
-    return ".";
+  if (ElemCount == 0 && (PP.atEnd() || *PP == PS("")))
+    return PS(".");
 
   // return a path constructed with 'n' dot-dot elements, followed by the the
   // elements of '*this' after the mismatch.
   path Result;
   // FIXME: Reserve enough room in Result that it won't have to re-allocate.
   while (ElemCount--)
-    Result /= "..";
+    Result /= PS("..");
   for (; PP; ++PP)
     Result /= *PP;
   return Result;
@@ -1562,7 +1571,7 @@ static int CompareRootName(PathParser *LHS, PathParser *RHS) {
     return 0;
 
   auto GetRootName = [](PathParser *Parser) -> string_view_t {
-    return Parser->inRootName() ? **Parser : "";
+    return Parser->inRootName() ? **Parser : PS("");
   };
   int res = GetRootName(LHS).compare(GetRootName(RHS));
   ConsumeRootName(LHS);


        


More information about the libcxx-commits mailing list