[libcxx-commits] [libcxx] [libc++] use copy_file_range for fs::copy (PR #109211)
Jannik Glückert via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Sep 20 08:15:04 PDT 2024
https://github.com/Jannik2099 updated https://github.com/llvm/llvm-project/pull/109211
>From 63d50033539505a46caf1db639eb21cdcf1c6bf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= <jannik.glueckert at gmail.com>
Date: Wed, 18 Sep 2024 21:45:08 +0200
Subject: [PATCH 1/2] [libc++] use copy_file_range for fs::copy_file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
opportunistically use copy_file_range (Linux, FreeBSD) where possible.
This allows for fast copies via reflinks,
and server side copies for network filesystems.
Fall back to sendfile if not supported.
Signed-off-by: Jannik Glückert <jannik.glueckert at gmail.com>
---
libcxx/src/CMakeLists.txt | 6 +++
libcxx/src/filesystem/operations.cpp | 70 +++++++++++++++++++++++++++-
2 files changed, 75 insertions(+), 1 deletion(-)
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 48c5111a0acbf6..3f97d3e730a42c 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -173,6 +173,12 @@ if (APPLE AND LLVM_USE_SANITIZER)
endif()
endif()
+include(CheckCXXSymbolExists)
+check_cxx_symbol_exists("copy_file_range" "unistd.h" LIBCXX_USE_COPY_FILE_RANGE)
+if(LIBCXX_USE_COPY_FILE_RANGE)
+ list(APPEND LIBCXX_COMPILE_FLAGS "-D_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE")
+endif()
+
split_list(LIBCXX_COMPILE_FLAGS)
split_list(LIBCXX_LINK_FLAGS)
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index d771f200973528..a3e650c93217d2 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -32,6 +32,7 @@
# include <dirent.h>
# include <sys/stat.h>
# include <sys/statvfs.h>
+# include <sys/types.h>
# include <unistd.h>
#endif
#include <fcntl.h> /* values for fchmodat */
@@ -178,8 +179,35 @@ void __copy(const path& from, const path& to, copy_options options, error_code*
namespace detail {
namespace {
+#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
+bool copy_file_impl_copy_file_range(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+ size_t count = read_fd.get_stat().st_size;
+ // a zero-length file is either empty, or not copyable by this syscall
+ // return early to avoid the syscall cost
+ if (count == 0) {
+ ec = {EINVAL, generic_category()};
+ return false;
+ }
+ // do not modify the fd positions as copy_file_impl_sendfile may be called after a partial copy
+ off_t off_in = 0;
+ off_t off_out = 0;
+ do {
+ ssize_t res;
+
+ if ((res = ::copy_file_range(read_fd.fd, &off_in, write_fd.fd, &off_out, count, 0)) == -1) {
+ ec = capture_errno();
+ return false;
+ }
+ count -= res;
+ } while (count > 0);
+
+ ec.clear();
+
+ return true;
+}
+#endif
#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
-bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+bool copy_file_impl_sendfile(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
do {
ssize_t res;
@@ -194,6 +222,46 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod
return true;
}
+#endif
+
+#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) || defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
+bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+# if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
+ if (copy_file_impl_copy_file_range(read_fd, write_fd, ec)) {
+ return true;
+ }
+ // EINVAL: src and dst are the same file (this is not cheaply
+ // detectable from userspace)
+ // EINVAL: copy_file_range is unsupported for this file type by the
+ // underlying filesystem
+ // ENOTSUP: undocumented, can arise with old kernels and NFS
+ // EOPNOTSUPP: filesystem does not implement copy_file_range
+ // ETXTBSY: src or dst is an active swapfile (nonsensical, but allowed
+ // with normal copying)
+ // EXDEV: src and dst are on different filesystems that do not support
+ // cross-fs copy_file_range
+ // ENOENT: undocumented, can arise with CIFS
+ // ENOSYS: unsupported by kernel or blocked by seccomp
+ if (ec.value() != EINVAL && ec.value() != ENOTSUP && ec.value() != EOPNOTSUPP && ec.value() != ETXTBSY &&
+ ec.value() != EXDEV && ec.value() != ENOENT && ec.value() != ENOSYS) {
+ return false;
+ }
+ ec.clear();
+# endif
+
+# if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
+ if (copy_file_impl_sendfile(read_fd, write_fd, ec)) {
+ return true;
+ }
+ // EINVAL: unsupported file type
+ if (ec.value() != EINVAL) {
+ return false;
+ }
+ ec.clear();
+# endif
+ ec = {EINVAL, generic_category()};
+ return false;
+}
#elif defined(_LIBCPP_FILESYSTEM_USE_COPYFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
struct CopyFileState {
>From 3bc0f7c0520ba0c96a01f889aef94ee4d9247424 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= <jannik.glueckert at gmail.com>
Date: Fri, 20 Sep 2024 17:06:08 +0200
Subject: [PATCH 2/2] [libc++] support special linux files in fs::copy_file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Virtual linux filesystems such as /proc and /sys
may contain files that appear as zero length,
but do contain data.
These require a traditional userspace read + write loop.
Signed-off-by: Jannik Glückert <jannik.glueckert at gmail.com>
---
libcxx/src/filesystem/operations.cpp | 87 +++++++++++--------
.../fs.op.copy_file/copy_file_procfs.pass.cpp | 50 +++++++++++
2 files changed, 103 insertions(+), 34 deletions(-)
create mode 100644 libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index a3e650c93217d2..8fc3be874a89dc 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -45,10 +45,17 @@
# include <copyfile.h>
# define _LIBCPP_FILESYSTEM_USE_COPYFILE
#else
-# include <fstream>
# define _LIBCPP_FILESYSTEM_USE_FSTREAM
#endif
+// sendfile and copy_file_range need to fall back
+// to the fstream implementation for special files
+#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE) || defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE) || \
+ defined(_LIBCPP_FILESYSTEM_USE_FSTREAM)
+# include <fstream>
+# define _LIBCPP_FILESYSTEM_NEED_FSTREAM
+#endif
+
#if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB)
# pragma comment(lib, "rt")
#endif
@@ -179,6 +186,42 @@ void __copy(const path& from, const path& to, copy_options options, error_code*
namespace detail {
namespace {
+#if defined(_LIBCPP_FILESYSTEM_NEED_FSTREAM)
+bool copy_file_impl_fstream(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
+ ifstream in;
+ in.__open(read_fd.fd, ios::binary);
+ if (!in.is_open()) {
+ // This assumes that __open didn't reset the error code.
+ ec = capture_errno();
+ return false;
+ }
+ read_fd.fd = -1;
+ ofstream out;
+ out.__open(write_fd.fd, ios::binary);
+ if (!out.is_open()) {
+ ec = capture_errno();
+ return false;
+ }
+ write_fd.fd = -1;
+
+ if (in.good() && out.good()) {
+ using InIt = istreambuf_iterator<char>;
+ using OutIt = ostreambuf_iterator<char>;
+ InIt bin(in);
+ InIt ein;
+ OutIt bout(out);
+ copy(bin, ein, bout);
+ }
+ if (out.fail() || in.fail()) {
+ ec = make_error_code(errc::io_error);
+ return false;
+ }
+
+ ec.clear();
+ return true;
+}
+#endif
+
#if defined(_LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE)
bool copy_file_impl_copy_file_range(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
@@ -209,6 +252,12 @@ bool copy_file_impl_copy_file_range(FileDescriptor& read_fd, FileDescriptor& wri
#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
bool copy_file_impl_sendfile(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
+ // a zero-length file is either empty, or not copyable by this syscall
+ // return early to avoid the syscall cost
+ if (count == 0) {
+ ec = {EINVAL, generic_category()};
+ return false;
+ }
do {
ssize_t res;
if ((res = ::sendfile(write_fd.fd, read_fd.fd, nullptr, count)) == -1) {
@@ -259,8 +308,8 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod
}
ec.clear();
# endif
- ec = {EINVAL, generic_category()};
- return false;
+
+ return copy_file_impl_fstream(read_fd, write_fd, ec);
}
#elif defined(_LIBCPP_FILESYSTEM_USE_COPYFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
@@ -285,37 +334,7 @@ bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_cod
}
#elif defined(_LIBCPP_FILESYSTEM_USE_FSTREAM)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
- ifstream in;
- in.__open(read_fd.fd, ios::binary);
- if (!in.is_open()) {
- // This assumes that __open didn't reset the error code.
- ec = capture_errno();
- return false;
- }
- read_fd.fd = -1;
- ofstream out;
- out.__open(write_fd.fd, ios::binary);
- if (!out.is_open()) {
- ec = capture_errno();
- return false;
- }
- write_fd.fd = -1;
-
- if (in.good() && out.good()) {
- using InIt = istreambuf_iterator<char>;
- using OutIt = ostreambuf_iterator<char>;
- InIt bin(in);
- InIt ein;
- OutIt bout(out);
- copy(bin, ein, bout);
- }
- if (out.fail() || in.fail()) {
- ec = make_error_code(errc::io_error);
- return false;
- }
-
- ec.clear();
- return true;
+ return copy_file_impl_fstream(read_fd, write_fd, ec);
}
#else
# error "Unknown implementation for copy_file_impl"
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp
new file mode 100644
index 00000000000000..e356b695e75941
--- /dev/null
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_procfs.pass.cpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+// REQUIRES: linux
+// UNSUPPORTED: no-filesystem
+// UNSUPPORTED: availability-filesystem-missing
+
+// <filesystem>
+
+// bool copy_file(const path& from, const path& to);
+// bool copy_file(const path& from, const path& to, error_code& ec) noexcept;
+// bool copy_file(const path& from, const path& to, copy_options options);
+// bool copy_file(const path& from, const path& to, copy_options options,
+// error_code& ec) noexcept;
+
+#include <filesystem>
+#include <cassert>
+
+#include "test_macros.h"
+#include "filesystem_test_helper.h"
+
+using namespace std::filesystem;
+
+// Linux has various virtual filesystems such as /proc and /sys
+// where files may have no length (st_size == 0), but still contain data.
+// This is because the to-be-read data is usually generated ad-hoc by the reading syscall
+// These files can not be copied with kernel-side copies like copy_file_range or sendfile,
+// and must instead be copied via a traditional userspace read + write loop.
+int main(int, char**) {
+ const path procfile{"/proc/self/comm"};
+ assert(file_size(procfile) == 0);
+
+ scoped_test_env env;
+ std::error_code ec = GetTestEC();
+
+ const path dest = env.make_env_path("dest");
+
+ assert(copy_file(procfile, dest, ec));
+ assert(!ec);
+
+ // a tad fragile if lit ever changes the output name,
+ // but the easiest way to assert that the correct data was copied.
+ assert(file_size(dest) == sizeof("t.tmp.exe"));
+}
More information about the libcxx-commits
mailing list