[libcxx-commits] [libcxx] [libcxx] Caches file attributes during directory iteration. (PR #93316)

Eduard Satdarov via libcxx-commits libcxx-commits at lists.llvm.org
Fri May 24 09:13:39 PDT 2024


https://github.com/ed-sat created https://github.com/llvm/llvm-project/pull/93316

Added caching file attributes during directory iteration for OS Windows. Allows improving performance working with files in directory.

>From 08ca1dcb567ee9ec6ed14d5f296e15e20cb1334a Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Fri, 24 May 2024 16:41:35 +0300
Subject: [PATCH] [libcxx] Cache file attributes during directory iteration.

---
 libcxx/include/__filesystem/directory_entry.h | 38 +++++++++++++++++++
 libcxx/src/filesystem/directory_iterator.cpp  | 12 +++---
 libcxx/src/filesystem/file_descriptor.h       | 16 +++++---
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index 016ad94a853dc..6d35d92ebf483 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -200,6 +200,8 @@ class directory_entry {
     _Empty,
     _IterSymlink,
     _IterNonSymlink,
+    _IterCachedSymlink,
+    _IterCachedNonSymlink,
     _RefreshSymlink,
     _RefreshSymlinkUnresolved,
     _RefreshNonSymlink
@@ -241,6 +243,28 @@ class directory_entry {
     return __data;
   }
 
+  _LIBCPP_HIDE_FROM_ABI static __cached_data __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
+    __cached_data __data;
+    __data.__type_       = __ft;
+    __data.__size_       = __size;
+    __data.__write_time_ = __write_time;
+    if (__ft == file_type::symlink)
+      __data.__sym_perms_ = __perm;
+    else
+      __data.__non_sym_perms_ = __perm;
+    __data.__cache_type_ = [&]() {
+      switch (__ft) {
+      case file_type::none:
+        return _Empty;
+      case file_type::symlink:
+        return _IterCachedSymlink;
+      default:
+        return _IterCachedNonSymlink;
+      }
+    }();
+    return __data;
+  }
+
   _LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
     __p_    = std::move(__p);
     __data_ = __dt;
@@ -282,12 +306,14 @@ class directory_entry {
     case _Empty:
       return __symlink_status(__p_, __ec).type();
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       if (__ec)
         __ec->clear();
       return file_type::symlink;
     case _IterNonSymlink:
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
       file_status __st(__data_.__type_);
       if (__ec && !filesystem::exists(__st))
@@ -303,9 +329,11 @@ class directory_entry {
     switch (__data_.__cache_type_) {
     case _Empty:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec).type();
     case _IterNonSymlink:
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink: {
       file_status __st(__data_.__type_);
@@ -324,8 +352,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink:
       return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
@@ -339,8 +369,10 @@ class directory_entry {
     case _IterNonSymlink:
     case _IterSymlink:
       return __symlink_status(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
       return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
@@ -353,8 +385,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__file_size(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
@@ -375,6 +409,8 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedNonSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__hard_link_count(__p_, __ec);
     case _RefreshSymlink:
@@ -395,6 +431,8 @@ class directory_entry {
     case _IterSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__last_write_time(__p_, __ec);
+    case _IterCachedNonSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index dceb3486279f8..d7ed9a358f559 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -77,13 +77,13 @@ class __dir_stream {
   bool assign() {
     if (!wcscmp(__data_.cFileName, L".") || !wcscmp(__data_.cFileName, L".."))
       return false;
-    // FIXME: Cache more of this
-    // directory_entry::__cached_data cdata;
-    // cdata.__type_ = get_file_type(__data_);
-    // cdata.__size_ = get_file_size(__data_);
-    // cdata.__write_time_ = get_write_time(__data_);
     __entry_.__assign_iter_entry(
-        __root_ / __data_.cFileName, directory_entry::__create_iter_result(detail::get_file_type(__data_)));
+        __root_ / __data_.cFileName,
+        directory_entry::__create_iter_cached_result(
+            detail::get_file_type(__data_),
+            detail::get_file_size(__data_),
+            detail::get_file_perm(__data_),
+            detail::get_write_time(__data_)));
     return true;
   }
 
diff --git a/libcxx/src/filesystem/file_descriptor.h b/libcxx/src/filesystem/file_descriptor.h
index 50178ff84e03f..44a072b72e766 100644
--- a/libcxx/src/filesystem/file_descriptor.h
+++ b/libcxx/src/filesystem/file_descriptor.h
@@ -97,11 +97,17 @@ inline uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
   return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
 }
 inline file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
-  ULARGE_INTEGER tmp;
-  const FILETIME& time = data.ftLastWriteTime;
-  tmp.u.LowPart        = time.dwLowDateTime;
-  tmp.u.HighPart       = time.dwHighDateTime;
-  return file_time_type(file_time_type::duration(tmp.QuadPart));
+  using detail::fs_time;
+  auto ts = filetime_to_timespec(time);
+  if (!fs_time::is_representable(ts))
+    return file_time_type::min();
+  return fs_time::convert_from_timespec(ts);
+}
+inline perms get_file_perm(const WIN32_FIND_DATAW& data) {
+  unsigned st_mode = 0555; // Read-only
+  if (!(data.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
+    st_mode |= 0222; // Write
+  return static_cast<perms>(st_mode) & perms::mask;
 }
 
 #endif // !_LIBCPP_WIN32API



More information about the libcxx-commits mailing list