[libcxx-commits] [libcxx] [libc++] use copy_file_range for fs::copy (PR #109211)
Jannik Glückert via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Sep 18 15:36:34 PDT 2024
https://github.com/Jannik2099 created https://github.com/llvm/llvm-project/pull/109211
Hi,
this optimizes `std::filesystem::copy_file` to use the `copy_file_range` syscall (Linux and FreeBSD) when available. It allows for reflinks on filesystems such as btrfs, zfs and xfs, and server-side copy for network filesystems such as NFS.
This behaviour is in line with Boost.Filesystem and libstdc++ (where [I implemented this](https://github.com/gcc-mirror/gcc/commit/d87caacf8e2df563afda85f3a5b7b852e08b6b2c) some time ago). It's also the default behaviour of the standard file copy operations in .NET Core, Java, and (hopefully) soon python, and in the `cp` program from GNU coreutils.
I don't have the opportunity to test this on FreeBSD, but I think the FreeBSD folks will want a look at this. Sadly I don't know any by name, perhaps someone could CC the relevant person(s).
As for the `LIBCXX_USE_COPY_FILE_RANGE` cmake check, I was unsure where to put it, and this is probably not the desired location.
Lastly I am not sure if the largefile handling (see usage of `off_t`) is correct.
>From 4e7d8b9a43560957806e517adde35056a4b5b373 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= <jannik.glueckert at gmail.com>
Date: Wed, 18 Sep 2024 21:45:08 +0200
Subject: [PATCH] [libc++] use copy_file_range for fs::copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Jannik Glückert <jannik.glueckert at gmail.com>
---
libcxx/src/CMakeLists.txt | 6 +++
libcxx/src/filesystem/operations.cpp | 60 +++++++++++++++++++++++++++-
2 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 48c5111a0acbf6..3f97d3e730a42c 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -173,6 +173,12 @@ if (APPLE AND LLVM_USE_SANITIZER)
endif()
endif()
+include(CheckCXXSymbolExists)
+check_cxx_symbol_exists("copy_file_range" "unistd.h" LIBCXX_USE_COPY_FILE_RANGE)
+if(LIBCXX_USE_COPY_FILE_RANGE)
+ list(APPEND LIBCXX_COMPILE_FLAGS "-D_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE")
+endif()
+
split_list(LIBCXX_COMPILE_FLAGS)
split_list(LIBCXX_LINK_FLAGS)
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index d771f200973528..42d16c0546ae10 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -32,6 +32,7 @@
# include <dirent.h>
# include <sys/stat.h>
# include <sys/statvfs.h>
+# include <sys/types.h>
# include <unistd.h>
#endif
#include <fcntl.h> /* values for fchmodat */
@@ -178,8 +179,35 @@ void __copy(const path& from, const path& to, copy_options options, error_code*
namespace detail {
namespace {
+#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
+bool copy_file_impl_copy_file_range(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+ size_t count = read_fd.get_stat().st_size;
+ // a zero-length file is either empty, or not copyable by this syscall
+ // return early to avoid the syscall cost
+ if (count == 0) {
+ ec = {EINVAL, generic_category()};
+ return false;
+ }
+ do {
+ ssize_t res;
+ // do not modify the fd positions as copy_file_impl_sendfile may be called after a partial copy
+ off_t off_in = 0;
+ off_t off_out = 0;
+
+ if ((res = ::copy_file_range(read_fd.fd, &off_in, write_fd.fd, &off_out, count, 0)) == -1) {
+ ec = capture_errno();
+ return false;
+ }
+ count -= res;
+ } while (count > 0);
+
+ ec.clear();
+
+ return true;
+}
+#endif
#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
-bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+bool copy_file_impl_sendfile(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
do {
ssize_t res;
@@ -194,6 +222,36 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod
return true;
}
+#endif
+
+#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
+bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+ bool has_copied = false;
+# if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
+ has_copied = copy_file_impl_copy_file_range(read_fd, write_fd, ec);
+ if (has_copied) {
+ return true;
+ }
+ // EINVAL: src and dst are the same file (this is not cheaply
+ // detectable from userspace)
+ // EINVAL: copy_file_range is unsupported for this file type by the
+ // underlying filesystem
+ // ENOTSUP: undocumented, can arise with old kernels and NFS
+ // EOPNOTSUPP: filesystem does not implement copy_file_range
+ // ETXTBSY: src or dst is an active swapfile (nonsensical, but allowed
+ // with normal copying)
+ // EXDEV: src and dst are on different filesystems that do not support
+ // cross-fs copy_file_range
+ // ENOENT: undocumented, can arise with CIFS
+ // ENOSYS: unsupported by kernel or blocked by seccomp
+ if (ec.value() != EINVAL && ec.value() != ENOTSUP && ec.value() != EOPNOTSUPP && ec.value() != ETXTBSY &&
+ ec.value() != EXDEV && ec.value() != ENOENT && ec.value() != ENOSYS) {
+ return false;
+ }
+ ec.clear();
+# endif
+ return copy_file_impl_sendfile(read_fd, write_fd, ec);
+}
#elif defined(_LIBCPP_FILESYSTEM_USE_COPYFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
struct CopyFileState {
More information about the libcxx-commits
mailing list