[llvm] [llvm][support] Refactor symlink handling and add readlink (PR #184256)

Michael Spencer via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 18:10:47 PST 2026


https://github.com/Bigcheese updated https://github.com/llvm/llvm-project/pull/184256

>From 32030396ee59ef29568da141d840f736572eab96 Mon Sep 17 00:00:00 2001
From: Michael Spencer <bigcheesegs at gmail.com>
Date: Mon, 2 Mar 2026 14:43:42 -0800
Subject: [PATCH] [llvm][support] Refactor symlink handling and add readlink

This adds a portable `readlink` function, and adds `create_symlink` to
enable testing this on Windows. `create_link` previously created a
hard link on Windows, but it now tries to create a symlink first.

The Windows implementation is based on posix_compat.h from libc++.

Assisted-by: claude-opus-4.6
---
 llvm/docs/ReleaseNotes.md              |   9 ++
 llvm/include/llvm/Support/FileSystem.h |  29 ++++--
 llvm/lib/Support/Unix/Path.inc         |  40 +++++++-
 llvm/lib/Support/Windows/Path.inc      | 130 ++++++++++++++++++++++++-
 llvm/unittests/Support/Path.cpp        |  89 ++++++++++++++---
 5 files changed, 269 insertions(+), 28 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 91b150c9fe982..3b10694007856 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -83,6 +83,15 @@ Changes to LLVM infrastructure
 * Removed `bugpoint`. Usage has been replaced by `llvm-reduce` and
   `llvm/utils/reduce_pipeline.py`.
 
+* The ``llvm::sys::fs`` link creation API has been refactored:
+
+  * ``create_link`` now tries to create a symbolic link first, falling back to a
+    hard link if that fails (previously it created a symlink on Unix and a hard
+    link on Windows).
+  * Added ``create_symlink``, which always creates a symbolic link. On windows
+    this may fail if symlink permissions are not available.
+  * Added ``readlink``, which reads the target of a symbolic link.
+
 Changes to building LLVM
 ------------------------
 
diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h
index 547d732dc3053..f6df9f9e17251 100644
--- a/llvm/include/llvm/Support/FileSystem.h
+++ b/llvm/include/llvm/Support/FileSystem.h
@@ -299,15 +299,23 @@ LLVM_ABI std::error_code create_directory(const Twine &path,
                                           bool IgnoreExisting = true,
                                           perms Perms = owner_all | group_all);
 
+/// Create a symbolic link from \a from to \a to.
+///
+/// This will fail on Windows if run without create symbolic link permissions.
+///
+/// @param to The path to the symlink target.
+/// @param from The path of the symlink to create.
+/// @returns errc::success if the link was created, otherwise a platform
+/// specific error_code.
+LLVM_ABI std::error_code create_symlink(const Twine &to, const Twine &from);
+
 /// Create a link from \a from to \a to.
 ///
-/// The link may be a soft or a hard link, depending on the platform. The caller
-/// may not assume which one. Currently on windows it creates a hard link since
-/// soft links require extra privileges. On unix, it creates a soft link since
-/// hard links don't work on SMB file systems.
+/// Tries to create a symbolic link first, and falls back to a hard link if
+/// that fails. The caller may not assume which type of link is created.
 ///
-/// @param to The path to hard link to.
-/// @param from The path to hard link from. This is created.
+/// @param to The path to link to.
+/// @param from The path to link from. This is created.
 /// @returns errc::success if the link was created, otherwise a platform
 /// specific error_code.
 LLVM_ABI std::error_code create_link(const Twine &to, const Twine &from);
@@ -331,6 +339,15 @@ LLVM_ABI std::error_code real_path(const Twine &path,
                                    SmallVectorImpl<char> &output,
                                    bool expand_tilde = false);
 
+/// Read the target of a symbolic link.
+///
+/// @param path The path of the symlink.
+/// @param output The location to store the symlink target.
+/// @returns errc::success if the symlink target has been stored in output,
+///          otherwise a platform-specific error_code.
+LLVM_ABI std::error_code readlink(const Twine &path,
+                                  SmallVectorImpl<char> &output);
+
 /// Expands ~ expressions to the user's home directory. On Unix ~user
 /// directories are resolved as well.
 ///
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index fc71ca46d823a..9b99cb820c054 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -241,7 +241,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
   const char *curproc = "/proc/curproc/file";
   char exe_path[PATH_MAX];
   if (sys::fs::exists(curproc)) {
-    ssize_t len = readlink(curproc, exe_path, sizeof(exe_path));
+    ssize_t len = ::readlink(curproc, exe_path, sizeof(exe_path));
     if (len > 0) {
       // Null terminate the string for realpath. readlink never null
       // terminates its output.
@@ -259,7 +259,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
   const char *aPath = "/proc/self/exe";
   if (sys::fs::exists(aPath)) {
     // /proc is not always mounted under Linux (chroot for example).
-    ssize_t len = readlink(aPath, exe_path, sizeof(exe_path));
+    ssize_t len = ::readlink(aPath, exe_path, sizeof(exe_path));
     if (len < 0)
       return "";
 
@@ -430,9 +430,7 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting,
   return std::error_code();
 }
 
-// Note that we are using symbolic link because hard links are not supported by
-// all filesystems (SMB doesn't).
-std::error_code create_link(const Twine &to, const Twine &from) {
+std::error_code create_symlink(const Twine &to, const Twine &from) {
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -445,6 +443,13 @@ std::error_code create_link(const Twine &to, const Twine &from) {
   return std::error_code();
 }
 
+std::error_code create_link(const Twine &to, const Twine &from) {
+  std::error_code EC = create_symlink(to, from);
+  if (EC)
+    EC = create_hard_link(to, from);
+  return EC;
+}
+
 std::error_code create_hard_link(const Twine &to, const Twine &from) {
   // Get arguments.
   SmallString<128> from_storage;
@@ -1422,6 +1427,31 @@ std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest,
   return std::error_code();
 }
 
+std::error_code readlink(const Twine &path, SmallVectorImpl<char> &dest) {
+  dest.clear();
+
+  SmallString<128> Storage;
+  StringRef P = path.toNullTerminatedStringRef(Storage);
+
+  // Call ::readlink in a loop, growing the buffer until the result fits. We
+  // can't use lstat to get the size ahead of time because it's racy (the
+  // symlink can be replaced between lstat and readlink), and some filesystems
+  // (e.g. /proc on Linux) report st_size == 0 for symlinks.
+  size_t BufSize = 128;
+  for (;;) {
+    dest.resize_for_overwrite(BufSize);
+    ssize_t Len = ::readlink(P.begin(), dest.data(), dest.size());
+    if (Len < 0)
+      return errnoAsErrorCode();
+    if (static_cast<size_t>(Len) < BufSize) {
+      dest.truncate(Len);
+      return std::error_code();
+    }
+    // Result may have been truncated. Grow and retry.
+    BufSize *= 2;
+  }
+}
+
 std::error_code changeFileOwnership(int FD, uint32_t Owner, uint32_t Group) {
   auto FChown = [&]() { return ::fchown(FD, Owner, Group); };
   // Retry if fchown call fails due to interruption.
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 9ab31c4f84f2f..e47ed914046c9 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -338,8 +338,46 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting,
   return std::error_code();
 }
 
-// We can't use symbolic links for windows.
+std::error_code create_symlink(const Twine &to, const Twine &from) {
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (std::error_code ec = widenPath(from, wide_from))
+    return ec;
+  if (std::error_code ec = widenPath(to, wide_to))
+    return ec;
+
+  // Windows requires SYMBOLIC_LINK_FLAG_DIRECTORY for directory symlinks.
+  DWORD Flags = 0;
+  DWORD Attr = ::GetFileAttributesW(wide_to.begin());
+  if (Attr != INVALID_FILE_ATTRIBUTES && (Attr & FILE_ATTRIBUTE_DIRECTORY))
+    Flags |= SYMBOLIC_LINK_FLAG_DIRECTORY;
+
+  // Try with SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE first, which works
+  // when Developer Mode is enabled on Windows 10+.
+  if (::CreateSymbolicLinkW(wide_from.begin(), wide_to.begin(),
+                            Flags |
+                                SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE))
+    return std::error_code();
+
+  // If the flag is not recognized (older Windows), retry without it.
+  DWORD Err = ::GetLastError();
+  if (Err == ERROR_INVALID_PARAMETER) {
+    if (::CreateSymbolicLinkW(wide_from.begin(), wide_to.begin(), Flags))
+      return std::error_code();
+    Err = ::GetLastError();
+  }
+
+  return mapWindowsError(Err);
+}
+
 std::error_code create_link(const Twine &to, const Twine &from) {
+  std::error_code EC = create_symlink(to, from);
+  if (EC)
+    EC = create_hard_link(to, from);
+  return EC;
+}
+
+std::error_code create_hard_link(const Twine &to, const Twine &from) {
   // Convert to utf-16.
   SmallVector<wchar_t, 128> wide_from;
   SmallVector<wchar_t, 128> wide_to;
@@ -354,10 +392,6 @@ std::error_code create_link(const Twine &to, const Twine &from) {
   return std::error_code();
 }
 
-std::error_code create_hard_link(const Twine &to, const Twine &from) {
-  return create_link(to, from);
-}
-
 std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
   SmallVector<wchar_t, 128> path_utf16;
 
@@ -1649,6 +1683,92 @@ std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest,
   return std::error_code();
 }
 
+// This struct is normally only available in the Windows Driver Kit (WDK)
+// headers, not in the standard Windows SDK.
+struct LLVM_REPARSE_DATA_BUFFER {
+  unsigned long ReparseTag;
+  unsigned short ReparseDataLength;
+  unsigned short Reserved;
+  union {
+    struct {
+      unsigned short SubstituteNameOffset;
+      unsigned short SubstituteNameLength;
+      unsigned short PrintNameOffset;
+      unsigned short PrintNameLength;
+      unsigned long Flags;
+      wchar_t PathBuffer[1];
+    } SymbolicLinkReparseBuffer;
+    struct {
+      unsigned short SubstituteNameOffset;
+      unsigned short SubstituteNameLength;
+      unsigned short PrintNameOffset;
+      unsigned short PrintNameLength;
+      wchar_t PathBuffer[1];
+    } MountPointReparseBuffer;
+    struct {
+      unsigned char DataBuffer[1];
+    } GenericReparseBuffer;
+  };
+};
+
+std::error_code readlink(const Twine &path, SmallVectorImpl<char> &dest) {
+  dest.clear();
+
+  SmallVector<wchar_t, 128> PathUTF16;
+  if (std::error_code EC = widenPath(path, PathUTF16))
+    return EC;
+
+  // Open the symlink without following it.
+  ScopedFileHandle H(::CreateFileW(
+      c_str(PathUTF16), FILE_READ_ATTRIBUTES,
+      FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
+      OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT,
+      NULL));
+  if (!H)
+    return mapWindowsError(::GetLastError());
+
+  // Read the reparse point data.
+  union {
+    LLVM_REPARSE_DATA_BUFFER RDB;
+    char Buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE];
+  };
+  DWORD BytesReturned;
+  if (!::DeviceIoControl(H, FSCTL_GET_REPARSE_POINT, NULL, 0, &RDB,
+                         sizeof(Buffer), &BytesReturned, NULL))
+    return mapWindowsError(::GetLastError());
+
+  if (RDB.ReparseTag != IO_REPARSE_TAG_SYMLINK)
+    return make_error_code(errc::invalid_argument);
+
+  const auto &SLB = RDB.SymbolicLinkReparseBuffer;
+  size_t PathBufOffset =
+      offsetof(LLVM_REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer);
+
+  // Prefer PrintName (user-friendly, e.g. "C:\foo") over SubstituteName
+  // (NT-internal, e.g. "\??\C:\foo").
+  USHORT NameOffset, NameLength;
+  if (SLB.PrintNameLength != 0) {
+    NameOffset = SLB.PrintNameOffset;
+    NameLength = SLB.PrintNameLength;
+  } else {
+    NameOffset = SLB.SubstituteNameOffset;
+    NameLength = SLB.SubstituteNameLength;
+  }
+
+  // Validate that the returned data is large enough to contain the name.
+  if (PathBufOffset + NameOffset + NameLength > BytesReturned)
+    return make_error_code(errc::invalid_argument);
+
+  wchar_t *Target = SLB.PathBuffer + NameOffset / sizeof(wchar_t);
+  USHORT TargetLen = NameLength / sizeof(wchar_t);
+
+  if (std::error_code EC = UTF16ToUTF8(Target, TargetLen, dest))
+    return EC;
+
+  llvm::sys::path::make_preferred(dest);
+  return std::error_code();
+}
+
 } // end namespace fs
 
 namespace path {
diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index b27ed6f950b10..8fae2fd182d07 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -784,6 +784,71 @@ TEST_F(FileSystemTest, RealPath) {
   ASSERT_NO_ERROR(fs::remove_directories(Twine(TestDirectory) + "/test1"));
 }
 
+TEST_F(FileSystemTest, Readlink) {
+  int FD;
+  SmallString<128> Target(TestDirectory);
+  path::append(Target, "target");
+  ASSERT_NO_ERROR(fs::openFileForWrite(Target, FD, fs::CD_CreateNew));
+  ::close(FD);
+
+  SmallString<128> Link(TestDirectory);
+  path::append(Link, "link");
+  std::error_code EC = fs::create_symlink(Target, Link);
+  if (EC) {
+    ASSERT_NO_ERROR(fs::remove(Target));
+    GTEST_SKIP() << "Symlinks not supported: " << EC.message();
+  }
+
+  SmallString<128> Result;
+  ASSERT_NO_ERROR(fs::readlink(Link, Result));
+  EXPECT_EQ(Target, Result);
+
+  ASSERT_NO_ERROR(fs::remove(Link));
+  ASSERT_NO_ERROR(fs::remove(Target));
+}
+
+TEST_F(FileSystemTest, ReadlinkRelative) {
+  int FD;
+  SmallString<128> Target(TestDirectory);
+  path::append(Target, "target");
+  ASSERT_NO_ERROR(fs::openFileForWrite(Target, FD, fs::CD_CreateNew));
+  ::close(FD);
+
+  SmallString<128> Link(TestDirectory);
+  path::append(Link, "link");
+  std::error_code EC = fs::create_symlink("target", Link);
+  if (EC) {
+    ASSERT_NO_ERROR(fs::remove(Target));
+    GTEST_SKIP() << "Symlinks not supported: " << EC.message();
+  }
+
+  SmallString<128> Result;
+  ASSERT_NO_ERROR(fs::readlink(Link, Result));
+  EXPECT_EQ("target", Result);
+
+  ASSERT_NO_ERROR(fs::remove(Link));
+  ASSERT_NO_ERROR(fs::remove(Target));
+}
+
+TEST_F(FileSystemTest, ReadlinkNonSymlink) {
+  int FD;
+  SmallString<128> Regular(TestDirectory);
+  path::append(Regular, "regular");
+  ASSERT_NO_ERROR(fs::openFileForWrite(Regular, FD, fs::CD_CreateNew));
+  ::close(FD);
+
+  SmallString<128> Result;
+  EXPECT_EQ(fs::readlink(Regular, Result), errc::invalid_argument);
+
+  ASSERT_NO_ERROR(fs::remove(Regular));
+}
+
+TEST_F(FileSystemTest, ReadlinkNonExistent) {
+  SmallString<128> Result;
+  EXPECT_EQ(fs::readlink(TestDirectory + "/does_not_exist", Result),
+            errc::no_such_file_or_directory);
+}
+
 TEST_F(FileSystemTest, ExpandTilde) {
   SmallString<64> Expected;
   SmallString<64> Actual;
@@ -1179,21 +1244,21 @@ TEST_F(FileSystemTest, BrokenSymlinkDirectoryIteration) {
   // Create a known hierarchy to recurse over.
   ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory) + "/symlink"));
   ASSERT_NO_ERROR(
-      fs::create_link("no_such_file", Twine(TestDirectory) + "/symlink/a"));
+      fs::create_symlink("no_such_file", Twine(TestDirectory) + "/symlink/a"));
   ASSERT_NO_ERROR(
       fs::create_directories(Twine(TestDirectory) + "/symlink/b/bb"));
+  ASSERT_NO_ERROR(fs::create_symlink("no_such_file",
+                                     Twine(TestDirectory) + "/symlink/b/ba"));
+  ASSERT_NO_ERROR(fs::create_symlink("no_such_file",
+                                     Twine(TestDirectory) + "/symlink/b/bc"));
   ASSERT_NO_ERROR(
-      fs::create_link("no_such_file", Twine(TestDirectory) + "/symlink/b/ba"));
-  ASSERT_NO_ERROR(
-      fs::create_link("no_such_file", Twine(TestDirectory) + "/symlink/b/bc"));
-  ASSERT_NO_ERROR(
-      fs::create_link("no_such_file", Twine(TestDirectory) + "/symlink/c"));
+      fs::create_symlink("no_such_file", Twine(TestDirectory) + "/symlink/c"));
   ASSERT_NO_ERROR(
       fs::create_directories(Twine(TestDirectory) + "/symlink/d/dd/ddd"));
-  ASSERT_NO_ERROR(fs::create_link(Twine(TestDirectory) + "/symlink/d/dd",
-                                  Twine(TestDirectory) + "/symlink/d/da"));
+  ASSERT_NO_ERROR(fs::create_symlink(Twine(TestDirectory) + "/symlink/d/dd",
+                                     Twine(TestDirectory) + "/symlink/d/da"));
   ASSERT_NO_ERROR(
-      fs::create_link("no_such_file", Twine(TestDirectory) + "/symlink/e"));
+      fs::create_symlink("no_such_file", Twine(TestDirectory) + "/symlink/e"));
 
   typedef std::vector<std::string> v_t;
   v_t VisitedNonBrokenSymlinks;
@@ -2731,13 +2796,13 @@ TEST_F(FileSystemTest, CopyFile) {
   verifyFileContents(Destination, Data[1]);
 
   // Note: The remaining logic is targeted at a potential failure case related
-  // to file cloning and symlinks on Darwin. On Windows, fs::create_link() does
-  // not return success here so the test is skipped.
+  // to file cloning and symlinks on Darwin. On Windows, fs::create_symlink()
+  // may not return success here so the test is skipped.
 #if !defined(_WIN32)
   // Set up a symlink to the third file.
   SmallString<128> Symlink(RootTestDirectory.path());
   path::append(Symlink, "symlink");
-  ASSERT_NO_ERROR(fs::create_link(path::filename(Sources[2]), Symlink));
+  ASSERT_NO_ERROR(fs::create_symlink(path::filename(Sources[2]), Symlink));
   verifyFileContents(Symlink, Data[2]);
 
   // fs::getUniqueID() should follow symlinks. Otherwise, this isn't good test



More information about the llvm-commits mailing list