[libcxx-commits] [libcxx] [libcxx] Cache file attributes during directory iteration. (PR #93316)

Eduard Satdarov via libcxx-commits libcxx-commits at lists.llvm.org
Sat Jul 20 07:56:18 PDT 2024


https://github.com/ed-sat updated https://github.com/llvm/llvm-project/pull/93316

>From 1fea8e10fc7a44001c71ee79adaf69207d4f71c4 Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Fri, 24 May 2024 16:41:35 +0300
Subject: [PATCH 1/5] [libcxx] Cache file attributes during directory
 iteration.

---
 libcxx/include/__filesystem/directory_entry.h | 38 +++++++++++++++++++
 libcxx/src/filesystem/directory_iterator.cpp  | 12 +++---
 libcxx/src/filesystem/file_descriptor.h       | 15 ++++++--
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index 96d88dcd90b4c..0f62b869281a2 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -199,6 +199,8 @@ class directory_entry {
     _Empty,
     _IterSymlink,
     _IterNonSymlink,
+    _IterCachedSymlink,
+    _IterCachedNonSymlink,
     _RefreshSymlink,
     _RefreshSymlinkUnresolved,
     _RefreshNonSymlink
@@ -240,6 +242,28 @@ class directory_entry {
     return __data;
   }
 
+  _LIBCPP_HIDE_FROM_ABI static __cached_data __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
+    __cached_data __data;
+    __data.__type_       = __ft;
+    __data.__size_       = __size;
+    __data.__write_time_ = __write_time;
+    if (__ft == file_type::symlink)
+      __data.__sym_perms_ = __perm;
+    else
+      __data.__non_sym_perms_ = __perm;
+    __data.__cache_type_ = [&]() {
+      switch (__ft) {
+      case file_type::none:
+        return _Empty;
+      case file_type::symlink:
+        return _IterCachedSymlink;
+      default:
+        return _IterCachedNonSymlink;
+      }
+    }();
+    return __data;
+  }
+
   _LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
     __p_    = std::move(__p);
     __data_ = __dt;
@@ -281,12 +305,14 @@ class directory_entry {
     case _Empty:
       return __symlink_status(__p_, __ec).type();
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       if (__ec)
         __ec->clear();
       return file_type::symlink;
     case _IterNonSymlink:
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
       file_status __st(__data_.__type_);
       if (__ec && !filesystem::exists(__st))
@@ -302,9 +328,11 @@ class directory_entry {
     switch (__data_.__cache_type_) {
     case _Empty:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec).type();
     case _IterNonSymlink:
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink: {
       file_status __st(__data_.__type_);
@@ -323,8 +351,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink:
       return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
@@ -338,8 +368,10 @@ class directory_entry {
     case _IterNonSymlink:
     case _IterSymlink:
       return __symlink_status(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
       return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
@@ -352,8 +384,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__file_size(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
@@ -374,6 +408,8 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedNonSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__hard_link_count(__p_, __ec);
     case _RefreshSymlink:
@@ -394,6 +430,8 @@ class directory_entry {
     case _IterSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__last_write_time(__p_, __ec);
+    case _IterCachedNonSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index dceb3486279f8..d7ed9a358f559 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -77,13 +77,13 @@ class __dir_stream {
   bool assign() {
     if (!wcscmp(__data_.cFileName, L".") || !wcscmp(__data_.cFileName, L".."))
       return false;
-    // FIXME: Cache more of this
-    // directory_entry::__cached_data cdata;
-    // cdata.__type_ = get_file_type(__data_);
-    // cdata.__size_ = get_file_size(__data_);
-    // cdata.__write_time_ = get_write_time(__data_);
     __entry_.__assign_iter_entry(
-        __root_ / __data_.cFileName, directory_entry::__create_iter_result(detail::get_file_type(__data_)));
+        __root_ / __data_.cFileName,
+        directory_entry::__create_iter_cached_result(
+            detail::get_file_type(__data_),
+            detail::get_file_size(__data_),
+            detail::get_file_perm(__data_),
+            detail::get_write_time(__data_)));
     return true;
   }
 
diff --git a/libcxx/src/filesystem/file_descriptor.h b/libcxx/src/filesystem/file_descriptor.h
index 50178ff84e03f..36b0b853b4dc6 100644
--- a/libcxx/src/filesystem/file_descriptor.h
+++ b/libcxx/src/filesystem/file_descriptor.h
@@ -97,11 +97,18 @@ inline uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
   return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
 }
 inline file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
-  ULARGE_INTEGER tmp;
+  using detail::fs_time;
   const FILETIME& time = data.ftLastWriteTime;
-  tmp.u.LowPart        = time.dwLowDateTime;
-  tmp.u.HighPart       = time.dwHighDateTime;
-  return file_time_type(file_time_type::duration(tmp.QuadPart));
+  auto ts = filetime_to_timespec(time);
+  if (!fs_time::is_representable(ts))
+    return file_time_type::min();
+  return fs_time::convert_from_timespec(ts);
+}
+inline perms get_file_perm(const WIN32_FIND_DATAW& data) {
+  unsigned st_mode = 0555; // Read-only
+  if (!(data.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
+    st_mode |= 0222; // Write
+  return static_cast<perms>(st_mode) & perms::mask;
 }
 
 #endif // !_LIBCPP_WIN32API

>From 6e6cb8ebd8a223788995c906e02ac3aafcf3c62c Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Thu, 30 May 2024 19:43:17 +0300
Subject: [PATCH 2/5] Test, formatting and fixes.

---
 libcxx/include/__filesystem/directory_entry.h |   5 +-
 .../cache_refresh_iter.pass.cpp               | 101 ++++++++++++++++++
 2 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp

diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index 0f62b869281a2..de02f5360719a 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -242,7 +242,8 @@ class directory_entry {
     return __data;
   }
 
-  _LIBCPP_HIDE_FROM_ABI static __cached_data __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
+  _LIBCPP_HIDE_FROM_ABI static __cached_data
+  __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
     __cached_data __data;
     __data.__type_       = __ft;
     __data.__size_       = __size;
@@ -428,10 +429,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__last_write_time(__p_, __ec);
     case _IterCachedNonSymlink:
-    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
new file mode 100644
index 0000000000000..b404bf6b95d6f
--- /dev/null
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
@@ -0,0 +1,101 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: can-create-symlinks
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: no-filesystem
+// UNSUPPORTED: availability-filesystem-missing
+
+// <filesystem>
+
+// recursive_directory_iterator
+
+#include <filesystem>
+#include <type_traits>
+#include <set>
+#include <cassert>
+
+#include "test_macros.h"
+#include "filesystem_test_helper.h"
+namespace fs = std::filesystem;
+using namespace fs;
+
+#if defined(_WIN32)
+static void set_last_write_time_in_iteration(const fs::path& dir)
+{
+    // Windows can postpone updating last write time for file especially for
+    // directory because last write time of directory depends of its childs.
+    // See
+    // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
+    // To force updating file entries calls "last_write_time" with own value.
+    const recursive_directory_iterator endIt{};
+
+    std::error_code ec;
+    recursive_directory_iterator it(dir, ec);
+    assert(!ec);
+
+    file_time_type now_time = file_time_type::clock::now();
+    for ( ; it != endIt; ++it) {
+        const path entry = *it;
+        last_write_time(entry, now_time, ec);
+        assert(!ec);
+    }
+
+    assert(it == endIt);
+}
+#endif 
+
+static void test_cache_and_refresh_in_iteration()
+{
+    static_test_env static_env;
+    const path testDir = static_env.Dir;
+#if defined(_WIN32)    
+    set_last_write_time_in_iteration(testDir);
+#endif 
+    const std::set<path> dir_contents(static_env.RecDirIterationList.begin(),
+                                      static_env.RecDirIterationList.end());
+    const recursive_directory_iterator endIt{};
+
+    std::error_code ec;
+    recursive_directory_iterator it(testDir, ec);
+    assert(!ec);
+
+    std::set<path> unseen_entries = dir_contents;
+    while (!unseen_entries.empty()) {
+        assert(it != endIt);
+        const directory_entry& entry = *it;
+
+        assert(unseen_entries.erase(entry.path()) == 1);
+
+        file_status symlink_status     = entry.symlink_status();
+        file_status status             = entry.status();
+        std::uintmax_t file_size       = entry.is_regular_file() ? entry.file_size() : 0;
+        file_time_type last_write_time = entry.last_write_time();
+
+        directory_entry mutable_entry = *it;
+        mutable_entry.refresh();
+        file_status upd_symlink_status     = mutable_entry.symlink_status();
+        file_status upd_status             = mutable_entry.status();
+        std::uintmax_t upd_file_size       = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
+        file_time_type upd_last_write_time = mutable_entry.last_write_time();
+        assert(upd_symlink_status == symlink_status);
+        assert(upd_status == status);
+        assert(upd_file_size == file_size);
+        assert(upd_last_write_time == last_write_time);
+
+        recursive_directory_iterator& it_ref = it.increment(ec);
+        assert(!ec);
+        assert(&it_ref == &it);
+    }
+}
+
+int main(int, char**) {
+    test_cache_and_refresh_in_iteration();
+
+    return 0;
+}

>From 69a00a0a7cfc1ca0f102c5f7c1b136525084168c Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Mon, 3 Jun 2024 12:54:33 +0300
Subject: [PATCH 3/5] [libcxx] Format code & fix test compilation for c++17.

---
 .../cache_refresh_iter.pass.cpp               | 126 +++++++++---------
 1 file changed, 62 insertions(+), 64 deletions(-)

diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
index b404bf6b95d6f..7ae4d792b019d 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
@@ -26,76 +26,74 @@ namespace fs = std::filesystem;
 using namespace fs;
 
 #if defined(_WIN32)
-static void set_last_write_time_in_iteration(const fs::path& dir)
-{
-    // Windows can postpone updating last write time for file especially for
-    // directory because last write time of directory depends of its childs.
-    // See
-    // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
-    // To force updating file entries calls "last_write_time" with own value.
-    const recursive_directory_iterator endIt{};
-
-    std::error_code ec;
-    recursive_directory_iterator it(dir, ec);
+static void set_last_write_time_in_iteration(const fs::path& dir) {
+  // Windows can postpone updating last write time for file especially for
+  // directory because last write time of directory depends of its childs.
+  // See
+  // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
+  // To force updating file entries calls "last_write_time" with own value.
+  const recursive_directory_iterator endIt{};
+
+  std::error_code ec;
+  recursive_directory_iterator it(dir, ec);
+  assert(!ec);
+
+  file_time_type now_time = file_time_type::clock::now();
+  for (; it != endIt; ++it) {
+    const path entry = *it;
+    last_write_time(entry, now_time, ec);
     assert(!ec);
+  }
 
-    file_time_type now_time = file_time_type::clock::now();
-    for ( ; it != endIt; ++it) {
-        const path entry = *it;
-        last_write_time(entry, now_time, ec);
-        assert(!ec);
-    }
-
-    assert(it == endIt);
+  assert(it == endIt);
 }
-#endif 
-
-static void test_cache_and_refresh_in_iteration()
-{
-    static_test_env static_env;
-    const path testDir = static_env.Dir;
-#if defined(_WIN32)    
-    set_last_write_time_in_iteration(testDir);
-#endif 
-    const std::set<path> dir_contents(static_env.RecDirIterationList.begin(),
-                                      static_env.RecDirIterationList.end());
-    const recursive_directory_iterator endIt{};
-
-    std::error_code ec;
-    recursive_directory_iterator it(testDir, ec);
-    assert(!ec);
+#endif
 
-    std::set<path> unseen_entries = dir_contents;
-    while (!unseen_entries.empty()) {
-        assert(it != endIt);
-        const directory_entry& entry = *it;
-
-        assert(unseen_entries.erase(entry.path()) == 1);
-
-        file_status symlink_status     = entry.symlink_status();
-        file_status status             = entry.status();
-        std::uintmax_t file_size       = entry.is_regular_file() ? entry.file_size() : 0;
-        file_time_type last_write_time = entry.last_write_time();
-
-        directory_entry mutable_entry = *it;
-        mutable_entry.refresh();
-        file_status upd_symlink_status     = mutable_entry.symlink_status();
-        file_status upd_status             = mutable_entry.status();
-        std::uintmax_t upd_file_size       = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
-        file_time_type upd_last_write_time = mutable_entry.last_write_time();
-        assert(upd_symlink_status == symlink_status);
-        assert(upd_status == status);
-        assert(upd_file_size == file_size);
-        assert(upd_last_write_time == last_write_time);
-
-        recursive_directory_iterator& it_ref = it.increment(ec);
-        assert(!ec);
-        assert(&it_ref == &it);
-    }
+static void test_cache_and_refresh_in_iteration() {
+  static_test_env static_env;
+  const path testDir = static_env.Dir;
+#if defined(_WIN32)
+  set_last_write_time_in_iteration(testDir);
+#endif
+  const std::set<path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
+  const recursive_directory_iterator endIt{};
+
+  std::error_code ec;
+  recursive_directory_iterator it(testDir, ec);
+  assert(!ec);
+
+  std::set<path> unseen_entries = dir_contents;
+  while (!unseen_entries.empty()) {
+    assert(it != endIt);
+    const directory_entry& entry = *it;
+
+    assert(unseen_entries.erase(entry.path()) == 1);
+
+    file_status symlink_status     = entry.symlink_status();
+    file_status status             = entry.status();
+    std::uintmax_t file_size       = entry.is_regular_file() ? entry.file_size() : 0;
+    file_time_type last_write_time = entry.last_write_time();
+
+    directory_entry mutable_entry = *it;
+    mutable_entry.refresh();
+    file_status upd_symlink_status     = mutable_entry.symlink_status();
+    file_status upd_status             = mutable_entry.status();
+    std::uintmax_t upd_file_size       = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
+    file_time_type upd_last_write_time = mutable_entry.last_write_time();
+    assert(upd_symlink_status.type() == symlink_status.type() &&
+           upd_symlink_status.permissions() == symlink_status.permissions());
+    assert(upd_status.type() == status.type() && upd_status.permissions() == status.permissions());
+    assert(upd_file_size == file_size);
+    assert(upd_last_write_time == last_write_time);
+
+    recursive_directory_iterator& it_ref = it.increment(ec);
+    assert(!ec);
+    assert(&it_ref == &it);
+  }
 }
 
 int main(int, char**) {
-    test_cache_and_refresh_in_iteration();
+  test_cache_and_refresh_in_iteration();
 
-    return 0;
+  return 0;
 }

>From d8211d9a5333296a6959e0c84e7afb1aa271821a Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Mon, 24 Jun 2024 19:42:12 +0300
Subject: [PATCH 4/5] [libcxx] Add availability for populating cache from
 directory_iterator.

---
 libcxx/include/__configuration/availability.h |  6 +++
 libcxx/include/__filesystem/directory_entry.h | 54 ++++++++++++++-----
 libcxx/src/filesystem/directory_iterator.cpp  |  7 ++-
 3 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h
index 7e6e18b22d93a..e75bb66eb02cb 100644
--- a/libcxx/include/__configuration/availability.h
+++ b/libcxx/include/__configuration/availability.h
@@ -353,6 +353,12 @@
 #define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19
 #define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE
 
+// This controls the availability of populating cache for directory entry
+// filesystem system. A directory_iterator builds cache for directory_entry
+// in the library.
+#define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY _LIBCPP_INTRODUCED_IN_LLVM_19
+#define _LIBCPP_AVAILABILITY_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE
+
 // Define availability attributes that depend on _LIBCPP_HAS_NO_EXCEPTIONS.
 // Those are defined in terms of the availability attributes above, and
 // should not be vendor-specific.
diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index de02f5360719a..9b293a4425806 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -199,11 +199,13 @@ class directory_entry {
     _Empty,
     _IterSymlink,
     _IterNonSymlink,
-    _IterCachedSymlink,
-    _IterCachedNonSymlink,
     _RefreshSymlink,
     _RefreshSymlinkUnresolved,
-    _RefreshNonSymlink
+    _RefreshNonSymlink,
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    _IterCachedSymlink,
+    _IterCachedNonSymlink
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
   };
 
   struct __cached_data {
@@ -242,6 +244,7 @@ class directory_entry {
     return __data;
   }
 
+  #  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
   _LIBCPP_HIDE_FROM_ABI static __cached_data
   __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
     __cached_data __data;
@@ -264,6 +267,7 @@ class directory_entry {
     }();
     return __data;
   }
+  #  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
 
   _LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
     __p_    = std::move(__p);
@@ -306,15 +310,19 @@ class directory_entry {
     case _Empty:
       return __symlink_status(__p_, __ec).type();
     case _IterSymlink:
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedSymlink:
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       if (__ec)
         __ec->clear();
       return file_type::symlink;
-    case _IterNonSymlink:
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedNonSymlink:
-    case _RefreshNonSymlink:
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterNonSymlink:
+    case _RefreshNonSymlink: {
       file_status __st(__data_.__type_);
       if (__ec && !filesystem::exists(__st))
         *__ec = make_error_code(errc::no_such_file_or_directory);
@@ -322,6 +330,7 @@ class directory_entry {
         __ec->clear();
       return __data_.__type_;
     }
+    }
     __libcpp_unreachable();
   }
 
@@ -329,11 +338,14 @@ class directory_entry {
     switch (__data_.__cache_type_) {
     case _Empty:
     case _IterSymlink:
-    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec).type();
-    case _IterNonSymlink:
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterCachedSymlink:
+      return __status(__p_, __ec).type();
     case _IterCachedNonSymlink:
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink: {
       file_status __st(__data_.__type_);
@@ -352,13 +364,17 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
-    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec);
-    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink:
       return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
+#    if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterCachedSymlink:
+      return __status(__p_, __ec);
+    case _IterCachedNonSymlink:
+      return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
+#    endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     }
     __libcpp_unreachable();
   }
@@ -369,13 +385,17 @@ class directory_entry {
     case _IterNonSymlink:
     case _IterSymlink:
       return __symlink_status(__p_, __ec);
-    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
       return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
-    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
+#    if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterCachedNonSymlink:
+      return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
+    case _IterCachedSymlink:
+      return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
+#    endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     }
     __libcpp_unreachable();
   }
@@ -385,10 +405,13 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
-    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__file_size(__p_, __ec);
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterCachedSymlink:
+      return filesystem::__file_size(__p_, __ec);
     case _IterCachedNonSymlink:
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
@@ -409,8 +432,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedNonSymlink:
     case _IterCachedSymlink:
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlinkUnresolved:
       return filesystem::__hard_link_count(__p_, __ec);
     case _RefreshSymlink:
@@ -429,10 +454,13 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
-    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return filesystem::__last_write_time(__p_, __ec);
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    case _IterCachedSymlink:
+      return filesystem::__last_write_time(__p_, __ec);
     case _IterCachedNonSymlink:
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index d7ed9a358f559..9798d2e57607c 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -79,11 +79,16 @@ class __dir_stream {
       return false;
     __entry_.__assign_iter_entry(
         __root_ / __data_.cFileName,
+#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
         directory_entry::__create_iter_cached_result(
             detail::get_file_type(__data_),
             detail::get_file_size(__data_),
             detail::get_file_perm(__data_),
-            detail::get_write_time(__data_)));
+            detail::get_write_time(__data_))
+#  else
+        directory_entry::__create_iter_result(detail::get_file_type(__data_))
+#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
+    );
     return true;
   }
 

>From 89c5d7ebc29bdd338da62205bec012bf9bd5bc18 Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Fri, 19 Jul 2024 17:08:56 +0300
Subject: [PATCH 5/5] [libcxx] Fixes after review, add & improve test.

---
 libcxx/include/__configuration/availability.h |   7 +-
 libcxx/include/__filesystem/directory_entry.h |  41 ++-----
 libcxx/src/filesystem/directory_iterator.cpp  |   7 +-
 libcxx/src/filesystem/file_descriptor.h       |   2 +-
 .../cache_refresh_iter.pass.cpp               | 104 ++++++++++++++++--
 5 files changed, 109 insertions(+), 52 deletions(-)

diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h
index e75bb66eb02cb..77852f8e86af3 100644
--- a/libcxx/include/__configuration/availability.h
+++ b/libcxx/include/__configuration/availability.h
@@ -353,9 +353,10 @@
 #define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19
 #define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE
 
-// This controls the availability of populating cache for directory entry
-// filesystem system. A directory_iterator builds cache for directory_entry
-// in the library.
+// This determines whether std::filesystem::directory_entry caches all the properties
+// it contains, which improves the performance when accessing a directory entry.
+// Code in the headers like filesystem::directory_entry's implementation needs to
+// know whether the dylib populated the cache or not.
 #define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY _LIBCPP_INTRODUCED_IN_LLVM_19
 #define _LIBCPP_AVAILABILITY_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE
 
diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index 9b293a4425806..c40b5d72338f1 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -202,10 +202,8 @@ class directory_entry {
     _RefreshSymlink,
     _RefreshSymlinkUnresolved,
     _RefreshNonSymlink,
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     _IterCachedSymlink,
     _IterCachedNonSymlink
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
   };
 
   struct __cached_data {
@@ -244,7 +242,6 @@ class directory_entry {
     return __data;
   }
 
-  #  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
   _LIBCPP_HIDE_FROM_ABI static __cached_data
   __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
     __cached_data __data;
@@ -267,7 +264,6 @@ class directory_entry {
     }();
     return __data;
   }
-  #  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
 
   _LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
     __p_    = std::move(__p);
@@ -310,17 +306,13 @@ class directory_entry {
     case _Empty:
       return __symlink_status(__p_, __ec).type();
     case _IterSymlink:
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedSymlink:
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       if (__ec)
         __ec->clear();
       return file_type::symlink;
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedNonSymlink:
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterNonSymlink:
     case _RefreshNonSymlink: {
       file_status __st(__data_.__type_);
@@ -338,13 +330,10 @@ class directory_entry {
     switch (__data_.__cache_type_) {
     case _Empty:
     case _IterSymlink:
-    case _RefreshSymlinkUnresolved:
-      return __status(__p_, __ec).type();
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedSymlink:
+    case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec).type();
     case _IterCachedNonSymlink:
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink: {
@@ -364,17 +353,13 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
+    case _IterCachedSymlink:
     case _RefreshSymlinkUnresolved:
       return __status(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
     case _RefreshSymlink:
       return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
-#    if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
-    case _IterCachedSymlink:
-      return __status(__p_, __ec);
-    case _IterCachedNonSymlink:
-      return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
-#    endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     }
     __libcpp_unreachable();
   }
@@ -385,17 +370,13 @@ class directory_entry {
     case _IterNonSymlink:
     case _IterSymlink:
       return __symlink_status(__p_, __ec);
+    case _IterCachedNonSymlink:
     case _RefreshNonSymlink:
       return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
+    case _IterCachedSymlink:
     case _RefreshSymlink:
     case _RefreshSymlinkUnresolved:
       return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
-#    if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
-    case _IterCachedNonSymlink:
-      return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
-    case _IterCachedSymlink:
-      return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
-#    endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     }
     __libcpp_unreachable();
   }
@@ -405,13 +386,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
-    case _RefreshSymlinkUnresolved:
-      return filesystem::__file_size(__p_, __ec);
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedSymlink:
+    case _RefreshSymlinkUnresolved:
       return filesystem::__file_size(__p_, __ec);
     case _IterCachedNonSymlink:
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
@@ -432,10 +410,8 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedNonSymlink:
     case _IterCachedSymlink:
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlinkUnresolved:
       return filesystem::__hard_link_count(__p_, __ec);
     case _RefreshSymlink:
@@ -454,13 +430,10 @@ class directory_entry {
     case _Empty:
     case _IterNonSymlink:
     case _IterSymlink:
-    case _RefreshSymlinkUnresolved:
-      return filesystem::__last_write_time(__p_, __ec);
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _IterCachedSymlink:
+    case _RefreshSymlinkUnresolved:
       return filesystem::__last_write_time(__p_, __ec);
     case _IterCachedNonSymlink:
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
     case _RefreshSymlink:
     case _RefreshNonSymlink: {
       error_code __m_ec;
diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index 9798d2e57607c..d7ed9a358f559 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -79,16 +79,11 @@ class __dir_stream {
       return false;
     __entry_.__assign_iter_entry(
         __root_ / __data_.cFileName,
-#  if _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
         directory_entry::__create_iter_cached_result(
             detail::get_file_type(__data_),
             detail::get_file_size(__data_),
             detail::get_file_perm(__data_),
-            detail::get_write_time(__data_))
-#  else
-        directory_entry::__create_iter_result(detail::get_file_type(__data_))
-#  endif // _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_FULLY_POPULATED_CACHED_ENTRY
-    );
+            detail::get_write_time(__data_)));
     return true;
   }
 
diff --git a/libcxx/src/filesystem/file_descriptor.h b/libcxx/src/filesystem/file_descriptor.h
index 36b0b853b4dc6..2c9e0d7f4772b 100644
--- a/libcxx/src/filesystem/file_descriptor.h
+++ b/libcxx/src/filesystem/file_descriptor.h
@@ -99,7 +99,7 @@ inline uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
 inline file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
   using detail::fs_time;
   const FILETIME& time = data.ftLastWriteTime;
-  auto ts = filetime_to_timespec(time);
+  auto ts              = filetime_to_timespec(time);
   if (!fs_time::is_representable(ts))
     return file_time_type::min();
   return fs_time::convert_from_timespec(ts);
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
index 7ae4d792b019d..1220248c5d913 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
@@ -32,39 +32,81 @@ static void set_last_write_time_in_iteration(const fs::path& dir) {
   // See
   // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
   // To force updating file entries calls "last_write_time" with own value.
-  const recursive_directory_iterator endIt{};
+  const recursive_directory_iterator end_it{};
 
   std::error_code ec;
   recursive_directory_iterator it(dir, ec);
   assert(!ec);
 
   file_time_type now_time = file_time_type::clock::now();
-  for (; it != endIt; ++it) {
+  for (; it != end_it; ++it) {
     const path entry = *it;
     last_write_time(entry, now_time, ec);
     assert(!ec);
   }
 
-  assert(it == endIt);
+  assert(it == end_it);
+}
+
+struct directory_entry_and_values {
+  directory_entry entry;
+
+  file_status symlink_status;
+  file_status status;
+  std::uintmax_t file_size;
+  file_time_type last_write_time;
+};
+
+std::vector<directory_entry_and_values> get_directory_entries_for(const path& dir, const std::set<path>& dir_contents) {
+  const recursive_directory_iterator end_it{};
+
+  std::error_code ec;
+  recursive_directory_iterator it(dir, ec);
+  assert(!ec);
+
+  std::vector<directory_entry_and_values> dir_entries;
+  std::set<path> unseen_entries = dir_contents;
+  while (!unseen_entries.empty()) {
+    assert(it != end_it);
+    const directory_entry& entry = *it;
+
+    assert(unseen_entries.erase(entry.path()) == 1);
+
+    dir_entries.push_back(directory_entry_and_values{
+        .entry           = entry,
+        .symlink_status  = entry.symlink_status(),
+        .status          = entry.status(),
+        .file_size       = entry.is_regular_file() ? entry.file_size() : 0,
+        .last_write_time = entry.last_write_time()});
+
+    recursive_directory_iterator& it_ref = it.increment(ec);
+    assert(!ec);
+    assert(&it_ref == &it);
+  }
+  return dir_entries;
 }
 #endif
 
+// Checks that the directory_entry properties will be the same before and after
+// calling "refresh" in case of iteration.
+// In case of Windows expects that directory_entry caches the properties during
+// iteration.
 static void test_cache_and_refresh_in_iteration() {
   static_test_env static_env;
-  const path testDir = static_env.Dir;
+  const path test_dir = static_env.Dir;
 #if defined(_WIN32)
-  set_last_write_time_in_iteration(testDir);
+  set_last_write_time_in_iteration(test_dir);
 #endif
   const std::set<path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
-  const recursive_directory_iterator endIt{};
+  const recursive_directory_iterator end_it{};
 
   std::error_code ec;
-  recursive_directory_iterator it(testDir, ec);
+  recursive_directory_iterator it(test_dir, ec);
   assert(!ec);
 
   std::set<path> unseen_entries = dir_contents;
   while (!unseen_entries.empty()) {
-    assert(it != endIt);
+    assert(it != end_it);
     const directory_entry& entry = *it;
 
     assert(unseen_entries.erase(entry.path()) == 1);
@@ -92,8 +134,54 @@ static void test_cache_and_refresh_in_iteration() {
   }
 }
 
+#if defined(_WIN32)
+// In case of Windows expects that the directory_entry caches the properties
+// during iteration and the properties don't change after deleting folders
+// and files.
+static void test_cached_values_in_iteration() {
+  std::vector<directory_entry_and_values> dir_entries;
+  {
+    static_test_env static_env;
+    const path testDir = static_env.Dir;
+    set_last_write_time_in_iteration(testDir);
+    const std::set<path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
+    dir_entries = get_directory_entries_for(testDir, dir_contents);
+  }
+  // Testing folder should be deleted after destoying static_test_env.
+
+  for (const auto& dir_entry : dir_entries) {
+    // During iteration Windows provides information only about symlink itself
+    // not about file/folder which symlink points to.
+    if (dir_entry.entry.is_symlink()) {
+      // Check that symlink is not using cached value about existing file.
+      assert(!dir_entry.entry.exists());
+    } else {
+      // Check that entry uses cached value about existing file.
+      assert(dir_entry.entry.exists());
+    }
+    file_status symlink_status = dir_entry.entry.symlink_status();
+    assert(dir_entry.symlink_status.type() == symlink_status.type() &&
+           dir_entry.symlink_status.permissions() == symlink_status.permissions());
+
+    if (!dir_entry.entry.is_symlink()) {
+      file_status status = dir_entry.entry.status();
+      assert(dir_entry.status.type() == status.type() && dir_entry.status.permissions() == status.permissions());
+
+      std::uintmax_t file_size = dir_entry.entry.is_regular_file() ? dir_entry.entry.file_size() : 0;
+      assert(dir_entry.file_size == file_size);
+
+      file_time_type last_write_time = dir_entry.entry.last_write_time();
+      assert(dir_entry.last_write_time == last_write_time);
+    }
+  }
+}
+#endif
+
 int main(int, char**) {
   test_cache_and_refresh_in_iteration();
+#if defined(_WIN32)
+  test_cached_values_in_iteration();
+#endif
 
   return 0;
 }



More information about the libcxx-commits mailing list