[libcxx-commits] [libcxx] [libcxx] Cache file attributes during directory iteration. (PR #93316)
Eduard Satdarov via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Jun 3 03:08:21 PDT 2024
https://github.com/ed-sat updated https://github.com/llvm/llvm-project/pull/93316
>From 33dabb74c21321f28f95933ead2bc3ab6e1f6293 Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Fri, 24 May 2024 16:41:35 +0300
Subject: [PATCH 1/3] [libcxx] Cache file attributes during directory
iteration.
---
libcxx/include/__filesystem/directory_entry.h | 38 +++++++++++++++++++
libcxx/src/filesystem/directory_iterator.cpp | 12 +++---
libcxx/src/filesystem/file_descriptor.h | 15 ++++++--
3 files changed, 55 insertions(+), 10 deletions(-)
diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index 016ad94a853dc..6d35d92ebf483 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -200,6 +200,8 @@ class directory_entry {
_Empty,
_IterSymlink,
_IterNonSymlink,
+ _IterCachedSymlink,
+ _IterCachedNonSymlink,
_RefreshSymlink,
_RefreshSymlinkUnresolved,
_RefreshNonSymlink
@@ -241,6 +243,28 @@ class directory_entry {
return __data;
}
+ _LIBCPP_HIDE_FROM_ABI static __cached_data __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
+ __cached_data __data;
+ __data.__type_ = __ft;
+ __data.__size_ = __size;
+ __data.__write_time_ = __write_time;
+ if (__ft == file_type::symlink)
+ __data.__sym_perms_ = __perm;
+ else
+ __data.__non_sym_perms_ = __perm;
+ __data.__cache_type_ = [&]() {
+ switch (__ft) {
+ case file_type::none:
+ return _Empty;
+ case file_type::symlink:
+ return _IterCachedSymlink;
+ default:
+ return _IterCachedNonSymlink;
+ }
+ }();
+ return __data;
+ }
+
_LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) {
__p_ = std::move(__p);
__data_ = __dt;
@@ -282,12 +306,14 @@ class directory_entry {
case _Empty:
return __symlink_status(__p_, __ec).type();
case _IterSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlink:
case _RefreshSymlinkUnresolved:
if (__ec)
__ec->clear();
return file_type::symlink;
case _IterNonSymlink:
+ case _IterCachedNonSymlink:
case _RefreshNonSymlink:
file_status __st(__data_.__type_);
if (__ec && !filesystem::exists(__st))
@@ -303,9 +329,11 @@ class directory_entry {
switch (__data_.__cache_type_) {
case _Empty:
case _IterSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return __status(__p_, __ec).type();
case _IterNonSymlink:
+ case _IterCachedNonSymlink:
case _RefreshNonSymlink:
case _RefreshSymlink: {
file_status __st(__data_.__type_);
@@ -324,8 +352,10 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return __status(__p_, __ec);
+ case _IterCachedNonSymlink:
case _RefreshNonSymlink:
case _RefreshSymlink:
return file_status(__get_ft(__ec), __data_.__non_sym_perms_);
@@ -339,8 +369,10 @@ class directory_entry {
case _IterNonSymlink:
case _IterSymlink:
return __symlink_status(__p_, __ec);
+ case _IterCachedNonSymlink:
case _RefreshNonSymlink:
return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_);
+ case _IterCachedSymlink:
case _RefreshSymlink:
case _RefreshSymlinkUnresolved:
return file_status(__get_sym_ft(__ec), __data_.__sym_perms_);
@@ -353,8 +385,10 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__file_size(__p_, __ec);
+ case _IterCachedNonSymlink:
case _RefreshSymlink:
case _RefreshNonSymlink: {
error_code __m_ec;
@@ -375,6 +409,8 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
+ case _IterCachedNonSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__hard_link_count(__p_, __ec);
case _RefreshSymlink:
@@ -395,6 +431,8 @@ class directory_entry {
case _IterSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__last_write_time(__p_, __ec);
+ case _IterCachedNonSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlink:
case _RefreshNonSymlink: {
error_code __m_ec;
diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index dceb3486279f8..d7ed9a358f559 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -77,13 +77,13 @@ class __dir_stream {
bool assign() {
if (!wcscmp(__data_.cFileName, L".") || !wcscmp(__data_.cFileName, L".."))
return false;
- // FIXME: Cache more of this
- // directory_entry::__cached_data cdata;
- // cdata.__type_ = get_file_type(__data_);
- // cdata.__size_ = get_file_size(__data_);
- // cdata.__write_time_ = get_write_time(__data_);
__entry_.__assign_iter_entry(
- __root_ / __data_.cFileName, directory_entry::__create_iter_result(detail::get_file_type(__data_)));
+ __root_ / __data_.cFileName,
+ directory_entry::__create_iter_cached_result(
+ detail::get_file_type(__data_),
+ detail::get_file_size(__data_),
+ detail::get_file_perm(__data_),
+ detail::get_write_time(__data_)));
return true;
}
diff --git a/libcxx/src/filesystem/file_descriptor.h b/libcxx/src/filesystem/file_descriptor.h
index 50178ff84e03f..36b0b853b4dc6 100644
--- a/libcxx/src/filesystem/file_descriptor.h
+++ b/libcxx/src/filesystem/file_descriptor.h
@@ -97,11 +97,18 @@ inline uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
}
inline file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
- ULARGE_INTEGER tmp;
+ using detail::fs_time;
const FILETIME& time = data.ftLastWriteTime;
- tmp.u.LowPart = time.dwLowDateTime;
- tmp.u.HighPart = time.dwHighDateTime;
- return file_time_type(file_time_type::duration(tmp.QuadPart));
+ auto ts = filetime_to_timespec(time);
+ if (!fs_time::is_representable(ts))
+ return file_time_type::min();
+ return fs_time::convert_from_timespec(ts);
+}
+inline perms get_file_perm(const WIN32_FIND_DATAW& data) {
+ unsigned st_mode = 0555; // Read-only
+ if (!(data.dwFileAttributes & FILE_ATTRIBUTE_READONLY))
+ st_mode |= 0222; // Write
+ return static_cast<perms>(st_mode) & perms::mask;
}
#endif // !_LIBCPP_WIN32API
>From e6c1dafa7c0d3b6a669586845fce4f94df7c0365 Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Thu, 30 May 2024 19:43:17 +0300
Subject: [PATCH 2/3] Test, formatting and fixes.
---
libcxx/include/__filesystem/directory_entry.h | 5 +-
.../cache_refresh_iter.pass.cpp | 101 ++++++++++++++++++
2 files changed, 104 insertions(+), 2 deletions(-)
create mode 100644 libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h
index 6d35d92ebf483..8f10b75f0cc82 100644
--- a/libcxx/include/__filesystem/directory_entry.h
+++ b/libcxx/include/__filesystem/directory_entry.h
@@ -243,7 +243,8 @@ class directory_entry {
return __data;
}
- _LIBCPP_HIDE_FROM_ABI static __cached_data __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
+ _LIBCPP_HIDE_FROM_ABI static __cached_data
+ __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) {
__cached_data __data;
__data.__type_ = __ft;
__data.__size_ = __size;
@@ -429,10 +430,10 @@ class directory_entry {
case _Empty:
case _IterNonSymlink:
case _IterSymlink:
+ case _IterCachedSymlink:
case _RefreshSymlinkUnresolved:
return filesystem::__last_write_time(__p_, __ec);
case _IterCachedNonSymlink:
- case _IterCachedSymlink:
case _RefreshSymlink:
case _RefreshNonSymlink: {
error_code __m_ec;
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
new file mode 100644
index 0000000000000..b404bf6b95d6f
--- /dev/null
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
@@ -0,0 +1,101 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: can-create-symlinks
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: no-filesystem
+// UNSUPPORTED: availability-filesystem-missing
+
+// <filesystem>
+
+// recursive_directory_iterator
+
+#include <filesystem>
+#include <type_traits>
+#include <set>
+#include <cassert>
+
+#include "test_macros.h"
+#include "filesystem_test_helper.h"
+namespace fs = std::filesystem;
+using namespace fs;
+
+#if defined(_WIN32)
+static void set_last_write_time_in_iteration(const fs::path& dir)
+{
+ // Windows can postpone updating last write time for file especially for
+ // directory because last write time of directory depends of its childs.
+ // See
+ // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
+ // To force updating file entries calls "last_write_time" with own value.
+ const recursive_directory_iterator endIt{};
+
+ std::error_code ec;
+ recursive_directory_iterator it(dir, ec);
+ assert(!ec);
+
+ file_time_type now_time = file_time_type::clock::now();
+ for ( ; it != endIt; ++it) {
+ const path entry = *it;
+ last_write_time(entry, now_time, ec);
+ assert(!ec);
+ }
+
+ assert(it == endIt);
+}
+#endif
+
+static void test_cache_and_refresh_in_iteration()
+{
+ static_test_env static_env;
+ const path testDir = static_env.Dir;
+#if defined(_WIN32)
+ set_last_write_time_in_iteration(testDir);
+#endif
+ const std::set<path> dir_contents(static_env.RecDirIterationList.begin(),
+ static_env.RecDirIterationList.end());
+ const recursive_directory_iterator endIt{};
+
+ std::error_code ec;
+ recursive_directory_iterator it(testDir, ec);
+ assert(!ec);
+
+ std::set<path> unseen_entries = dir_contents;
+ while (!unseen_entries.empty()) {
+ assert(it != endIt);
+ const directory_entry& entry = *it;
+
+ assert(unseen_entries.erase(entry.path()) == 1);
+
+ file_status symlink_status = entry.symlink_status();
+ file_status status = entry.status();
+ std::uintmax_t file_size = entry.is_regular_file() ? entry.file_size() : 0;
+ file_time_type last_write_time = entry.last_write_time();
+
+ directory_entry mutable_entry = *it;
+ mutable_entry.refresh();
+ file_status upd_symlink_status = mutable_entry.symlink_status();
+ file_status upd_status = mutable_entry.status();
+ std::uintmax_t upd_file_size = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
+ file_time_type upd_last_write_time = mutable_entry.last_write_time();
+ assert(upd_symlink_status == symlink_status);
+ assert(upd_status == status);
+ assert(upd_file_size == file_size);
+ assert(upd_last_write_time == last_write_time);
+
+ recursive_directory_iterator& it_ref = it.increment(ec);
+ assert(!ec);
+ assert(&it_ref == &it);
+ }
+}
+
+int main(int, char**) {
+ test_cache_and_refresh_in_iteration();
+
+ return 0;
+}
>From b8670a2a797988762d28a24c0904c0236f2fca5e Mon Sep 17 00:00:00 2001
From: Eduard Satdarov <sath at yandex-team.ru>
Date: Mon, 3 Jun 2024 12:54:33 +0300
Subject: [PATCH 3/3] [libcxx] Format code & fix test compilation for c++17.
---
.../cache_refresh_iter.pass.cpp | 126 +++++++++---------
1 file changed, 62 insertions(+), 64 deletions(-)
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
index b404bf6b95d6f..7ae4d792b019d 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/cache_refresh_iter.pass.cpp
@@ -26,76 +26,74 @@ namespace fs = std::filesystem;
using namespace fs;
#if defined(_WIN32)
-static void set_last_write_time_in_iteration(const fs::path& dir)
-{
- // Windows can postpone updating last write time for file especially for
- // directory because last write time of directory depends of its childs.
- // See
- // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
- // To force updating file entries calls "last_write_time" with own value.
- const recursive_directory_iterator endIt{};
-
- std::error_code ec;
- recursive_directory_iterator it(dir, ec);
+static void set_last_write_time_in_iteration(const fs::path& dir) {
+ // Windows can postpone updating last write time for file especially for
+ // directory because last write time of directory depends of its childs.
+ // See
+ // https://learn.microsoft.com/en-us/windows/win32/sysinfo/file-times
+ // To force updating file entries calls "last_write_time" with own value.
+ const recursive_directory_iterator endIt{};
+
+ std::error_code ec;
+ recursive_directory_iterator it(dir, ec);
+ assert(!ec);
+
+ file_time_type now_time = file_time_type::clock::now();
+ for (; it != endIt; ++it) {
+ const path entry = *it;
+ last_write_time(entry, now_time, ec);
assert(!ec);
+ }
- file_time_type now_time = file_time_type::clock::now();
- for ( ; it != endIt; ++it) {
- const path entry = *it;
- last_write_time(entry, now_time, ec);
- assert(!ec);
- }
-
- assert(it == endIt);
+ assert(it == endIt);
}
-#endif
-
-static void test_cache_and_refresh_in_iteration()
-{
- static_test_env static_env;
- const path testDir = static_env.Dir;
-#if defined(_WIN32)
- set_last_write_time_in_iteration(testDir);
-#endif
- const std::set<path> dir_contents(static_env.RecDirIterationList.begin(),
- static_env.RecDirIterationList.end());
- const recursive_directory_iterator endIt{};
-
- std::error_code ec;
- recursive_directory_iterator it(testDir, ec);
- assert(!ec);
+#endif
- std::set<path> unseen_entries = dir_contents;
- while (!unseen_entries.empty()) {
- assert(it != endIt);
- const directory_entry& entry = *it;
-
- assert(unseen_entries.erase(entry.path()) == 1);
-
- file_status symlink_status = entry.symlink_status();
- file_status status = entry.status();
- std::uintmax_t file_size = entry.is_regular_file() ? entry.file_size() : 0;
- file_time_type last_write_time = entry.last_write_time();
-
- directory_entry mutable_entry = *it;
- mutable_entry.refresh();
- file_status upd_symlink_status = mutable_entry.symlink_status();
- file_status upd_status = mutable_entry.status();
- std::uintmax_t upd_file_size = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
- file_time_type upd_last_write_time = mutable_entry.last_write_time();
- assert(upd_symlink_status == symlink_status);
- assert(upd_status == status);
- assert(upd_file_size == file_size);
- assert(upd_last_write_time == last_write_time);
-
- recursive_directory_iterator& it_ref = it.increment(ec);
- assert(!ec);
- assert(&it_ref == &it);
- }
+static void test_cache_and_refresh_in_iteration() {
+ static_test_env static_env;
+ const path testDir = static_env.Dir;
+#if defined(_WIN32)
+ set_last_write_time_in_iteration(testDir);
+#endif
+ const std::set<path> dir_contents(static_env.RecDirIterationList.begin(), static_env.RecDirIterationList.end());
+ const recursive_directory_iterator endIt{};
+
+ std::error_code ec;
+ recursive_directory_iterator it(testDir, ec);
+ assert(!ec);
+
+ std::set<path> unseen_entries = dir_contents;
+ while (!unseen_entries.empty()) {
+ assert(it != endIt);
+ const directory_entry& entry = *it;
+
+ assert(unseen_entries.erase(entry.path()) == 1);
+
+ file_status symlink_status = entry.symlink_status();
+ file_status status = entry.status();
+ std::uintmax_t file_size = entry.is_regular_file() ? entry.file_size() : 0;
+ file_time_type last_write_time = entry.last_write_time();
+
+ directory_entry mutable_entry = *it;
+ mutable_entry.refresh();
+ file_status upd_symlink_status = mutable_entry.symlink_status();
+ file_status upd_status = mutable_entry.status();
+ std::uintmax_t upd_file_size = mutable_entry.is_regular_file() ? mutable_entry.file_size() : 0;
+ file_time_type upd_last_write_time = mutable_entry.last_write_time();
+ assert(upd_symlink_status.type() == symlink_status.type() &&
+ upd_symlink_status.permissions() == symlink_status.permissions());
+ assert(upd_status.type() == status.type() && upd_status.permissions() == status.permissions());
+ assert(upd_file_size == file_size);
+ assert(upd_last_write_time == last_write_time);
+
+ recursive_directory_iterator& it_ref = it.increment(ec);
+ assert(!ec);
+ assert(&it_ref == &it);
+ }
}
int main(int, char**) {
- test_cache_and_refresh_in_iteration();
+ test_cache_and_refresh_in_iteration();
- return 0;
+ return 0;
}
More information about the libcxx-commits
mailing list