[llvm] 0d5b642 - Support: Reduce stats in fs::copy_file on Darwin

Duncan P. N. Exon Smith via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 29 16:48:43 PDT 2021


Author: Duncan P. N. Exon Smith
Date: 2021-10-29T16:48:35-07:00
New Revision: 0d5b6423bac61efd8aaedaa7b65b462d2f08c661

URL: https://github.com/llvm/llvm-project/commit/0d5b6423bac61efd8aaedaa7b65b462d2f08c661
DIFF: https://github.com/llvm/llvm-project/commit/0d5b6423bac61efd8aaedaa7b65b462d2f08c661.diff

LOG: Support: Reduce stats in fs::copy_file on Darwin

fs::copy_file() on Darwin has a nice optimization to clone the file when
possible. Change the implementation to use clonefile() directly, instead
of the higher-level copyfile().  The latter does the wrong thing for
symlinks, which requires calling `stat` first...

With that out of the way, optimistically call clonefile() all the time,
and then for any error that's recoverable try again with copyfile()
(without the COPYFILE_CLONE flag, as before).

Differential Revision: https://reviews.llvm.org/D112250

Added: 
    

Modified: 
    llvm/lib/Support/Unix/Path.inc
    llvm/unittests/Support/Path.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 33c62594588bd..19d89db556273 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -39,6 +39,9 @@
 #include <mach-o/dyld.h>
 #include <sys/attr.h>
 #include <copyfile.h>
+#if __has_include(<sys/clonefile.h>)
+#include <sys/clonefile.h>
+#endif
 #elif defined(__FreeBSD__)
 #include <osreldate.h>
 #if __FreeBSD_version >= 1300057
@@ -1457,22 +1460,37 @@ namespace fs {
 /// file descriptor variant of this function still uses the default
 /// implementation.
 std::error_code copy_file(const Twine &From, const Twine &To) {
-  uint32_t Flag = COPYFILE_DATA;
-#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
+  std::string FromS = From.str();
+  std::string ToS = To.str();
+#if __has_builtin(__builtin_available)
   if (__builtin_available(macos 10.12, *)) {
-    bool IsSymlink;
-    if (std::error_code Error = is_symlink_file(From, IsSymlink))
-      return Error;
-    // COPYFILE_CLONE clones the symlink instead of following it
-    // and returns EEXISTS if the target file already exists.
-    if (!IsSymlink && !exists(To))
-      Flag = COPYFILE_CLONE;
+    // Optimistically try to use clonefile() and handle errors, rather than
+    // calling stat() to see if it'll work.
+    //
+    // Note: It's okay if From is a symlink. In contrast to the behaviour of
+    // copyfile() with COPYFILE_CLONE, clonefile() clones targets (not the
+    // symlink itself) unless the flag CLONE_NOFOLLOW is passed.
+    if (!clonefile(FromS.c_str(), ToS.c_str(), 0))
+      return std::error_code();
+
+    auto Errno = errno;
+    switch (Errno) {
+      case EEXIST:  // To already exists.
+      case ENOTSUP: // Device does not support cloning.
+      case EXDEV:   // From and To are on 
diff erent devices.
+        break;
+      default:
+        // Anything else will also break copyfile().
+        return std::error_code(Errno, std::generic_category());
+    }
+
+    // TODO: For EEXIST, profile calling fs::generateUniqueName() and
+    // clonefile() in a retry loop (then rename() on success) before falling
+    // back to copyfile(). Depending on the size of the file this could be
+    // cheaper.
   }
 #endif
-  int Status =
-      copyfile(From.str().c_str(), To.str().c_str(), /* State */ NULL, Flag);
-
-  if (Status == 0)
+  if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
     return std::error_code();
   return std::error_code(errno, std::generic_category());
 }

diff  --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index 59d3f21e64cdd..52e447fc2b324 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -2325,4 +2326,61 @@ TEST_F(FileSystemTest, FileLocker) {
 }
 #endif
 
+TEST_F(FileSystemTest, CopyFile) {
+  unittest::TempDir RootTestDirectory("CopyFileTest", /*Unique=*/true);
+
+  SmallVector<std::string> Data;
+  SmallVector<SmallString<128>> Sources;
+  for (int I = 0, E = 3; I != E; ++I) {
+    Data.push_back(Twine(I).str());
+    Sources.emplace_back(RootTestDirectory.path());
+    path::append(Sources.back(), "source" + Data.back() + ".txt");
+    createFileWithData(Sources.back(), /*ShouldExistBefore=*/false,
+                       fs::CD_CreateNew, Data.back());
+  }
+
+  // Copy the first file to a non-existing file.
+  SmallString<128> Destination(RootTestDirectory.path());
+  path::append(Destination, "destination");
+  ASSERT_FALSE(fs::exists(Destination));
+  fs::copy_file(Sources[0], Destination);
+  verifyFileContents(Destination, Data[0]);
+
+  // Copy the second file to an existing file.
+  fs::copy_file(Sources[1], Destination);
+  verifyFileContents(Destination, Data[1]);
+
+  // Note: The remaining logic is targeted at a potential failure case related
+  // to file cloning and symlinks on Darwin. On Windows, fs::create_link() does
+  // not return success here so the test is skipped.
+#if !defined(_WIN32)
+  // Set up a symlink to the third file.
+  SmallString<128> Symlink(RootTestDirectory.path());
+  path::append(Symlink, "symlink");
+  ASSERT_NO_ERROR(fs::create_link(path::filename(Sources[2]), Symlink));
+  verifyFileContents(Symlink, Data[2]);
+
+  // fs::getUniqueID() should follow symlinks. Otherwise, this isn't good test
+  // coverage.
+  fs::UniqueID SymlinkID;
+  fs::UniqueID Data2ID;
+  ASSERT_NO_ERROR(fs::getUniqueID(Symlink, SymlinkID));
+  ASSERT_NO_ERROR(fs::getUniqueID(Sources[2], Data2ID));
+  ASSERT_EQ(SymlinkID, Data2ID);
+
+  // Copy the third file through the symlink.
+  fs::copy_file(Symlink, Destination);
+  verifyFileContents(Destination, Data[2]);
+
+  // Confirm the destination is not a link to the original file, and not a
+  // symlink.
+  bool IsDestinationSymlink;
+  ASSERT_NO_ERROR(fs::is_symlink_file(Destination, IsDestinationSymlink));
+  ASSERT_FALSE(IsDestinationSymlink);
+  fs::UniqueID DestinationID;
+  ASSERT_NO_ERROR(fs::getUniqueID(Destination, DestinationID));
+  ASSERT_NE(SymlinkID, DestinationID);
+#endif
+}
+
 } // anonymous namespace


        


More information about the llvm-commits mailing list