[llvm] e6f6c55 - [Support] Improve Windows widenPath and add support for long UNC paths

Andrew Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 19 06:01:58 PDT 2020


Author: Andrew Ng
Date: 2020-03-19T13:00:21Z
New Revision: e6f6c551213a65b66e63f3c5d84b85d2b99097f3

URL: https://github.com/llvm/llvm-project/commit/e6f6c551213a65b66e63f3c5d84b85d2b99097f3
DIFF: https://github.com/llvm/llvm-project/commit/e6f6c551213a65b66e63f3c5d84b85d2b99097f3.diff

LOG: [Support] Improve Windows widenPath and add support for long UNC paths

Check the path length limit against the length of the UTF-16 version of
the input rather than the UTF-8 equivalent, as the UTF-16 length may be
shorter. Move widenPath from the llvm::sys::path namespace in Path.h to
the llvm::sys::windows namespace in WindowsSupport.h. Only use the
reduced path length limit for create directory. Canonicalize using
sys::path::remove_dots().

Differential Revision: https://reviews.llvm.org/D75372

Added: 
    

Modified: 
    llvm/include/llvm/Support/Path.h
    llvm/include/llvm/Support/Windows/WindowsSupport.h
    llvm/lib/Support/Windows/Path.inc
    llvm/lib/Support/Windows/Program.inc
    llvm/unittests/Support/Path.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h
index 97955f882d51..f0b2810cd7a9 100644
--- a/llvm/include/llvm/Support/Path.h
+++ b/llvm/include/llvm/Support/Path.h
@@ -468,10 +468,6 @@ StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
 bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
                  Style style = Style::native);
 
-#if defined(_WIN32)
-std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16);
-#endif
-
 } // end namespace path
 } // end namespace sys
 } // end namespace llvm

diff  --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h
index bb7e79b86018..bd5a90c2c3f0 100644
--- a/llvm/include/llvm/Support/Windows/WindowsSupport.h
+++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h
@@ -236,6 +236,12 @@ namespace windows {
 // UTF-8 regardless of the current code page setting.
 std::error_code GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
                                         BumpPtrAllocator &Alloc);
+
+/// Convert UTF-8 path to a suitable UTF-16 path for use with the Win32 Unicode
+/// File API.
+std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
+                          size_t MaxPathLen = MAX_PATH);
+
 } // end namespace windows
 } // end namespace sys
 } // end namespace llvm.

diff  --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index d634c123fbdc..ee199aa1d4b1 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -47,7 +47,7 @@ using namespace llvm;
 using llvm::sys::windows::UTF8ToUTF16;
 using llvm::sys::windows::CurCPToUTF16;
 using llvm::sys::windows::UTF16ToUTF8;
-using llvm::sys::path::widenPath;
+using llvm::sys::windows::widenPath;
 
 static bool is_separator(const wchar_t value) {
   switch (value) {
@@ -61,64 +61,70 @@ static bool is_separator(const wchar_t value) {
 
 namespace llvm {
 namespace sys  {
-namespace path {
+namespace windows {
 
-// Convert a UTF-8 path to UTF-16.  Also, if the absolute equivalent of the
-// path is longer than CreateDirectory can tolerate, make it absolute and
-// prefixed by '\\?\'.
-std::error_code widenPath(const Twine &Path8,
-                          SmallVectorImpl<wchar_t> &Path16) {
-  const size_t MaxDirLen = MAX_PATH - 12; // Must leave room for 8.3 filename.
+// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the path
+// is longer than the limit that the Win32 Unicode File API can tolerate, make
+// it an absolute normalized path prefixed by '\\?\'.
+std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
+                          size_t MaxPathLen) {
+  assert(MaxPathLen <= MAX_PATH);
 
-  // Several operations would convert Path8 to SmallString; more efficient to
-  // do it once up front.
-  SmallString<128> Path8Str;
+  // Several operations would convert Path8 to SmallString; more efficient to do
+  // it once up front.
+  SmallString<MAX_PATH> Path8Str;
   Path8.toVector(Path8Str);
 
-  // If we made this path absolute, how much longer would it get?
+  std::error_code EC = UTF8ToUTF16(Path8Str, Path16);
+  if (EC)
+    return EC;
+
+  const bool IsAbsolute = llvm::sys::path::is_absolute(Path8);
   size_t CurPathLen;
-  if (llvm::sys::path::is_absolute(Twine(Path8Str)))
+  if (IsAbsolute)
     CurPathLen = 0; // No contribution from current_path needed.
   else {
-    CurPathLen = ::GetCurrentDirectoryW(0, NULL);
+    CurPathLen = ::GetCurrentDirectoryW(
+        0, NULL); // Returns the size including the null terminator.
     if (CurPathLen == 0)
       return mapWindowsError(::GetLastError());
   }
 
-  // Would the absolute path be longer than our limit?
-  if ((Path8Str.size() + CurPathLen) >= MaxDirLen &&
-      !Path8Str.startswith("\\\\?\\")) {
-    SmallString<2*MAX_PATH> FullPath("\\\\?\\");
-    if (CurPathLen) {
-      SmallString<80> CurPath;
-      if (std::error_code EC = llvm::sys::fs::current_path(CurPath))
-        return EC;
-      FullPath.append(CurPath);
-    }
-    // Traverse the requested path, canonicalizing . and .. (because the \\?\
-    // prefix is documented to treat them as real components).  Ignore
-    // separators, which can be returned from the iterator if the path has a
-    // drive name.  We don't need to call native() on the result since append()
-    // always attaches preferred_separator.
-    for (llvm::sys::path::const_iterator I = llvm::sys::path::begin(Path8Str),
-                                         E = llvm::sys::path::end(Path8Str);
-                                         I != E; ++I) {
-      if (I->size() == 1 && is_separator((*I)[0]))
-        continue;
-      if (I->size() == 1 && *I == ".")
-        continue;
-      if (I->size() == 2 && *I == "..")
-        llvm::sys::path::remove_filename(FullPath);
-      else
-        llvm::sys::path::append(FullPath, *I);
-    }
-    return UTF8ToUTF16(FullPath, Path16);
+  const char *const LongPathPrefix = "\\\\?\\";
+
+  if ((Path16.size() + CurPathLen) < MaxPathLen ||
+      Path8Str.startswith(LongPathPrefix))
+    return std::error_code();
+
+  if (!IsAbsolute) {
+    if (EC = llvm::sys::fs::make_absolute(Path8Str))
+      return EC;
   }
 
-  // Just use the caller's original path.
-  return UTF8ToUTF16(Path8Str, Path16);
+  // Remove '.' and '..' because long paths treat these as real path components.
+  llvm::sys::path::remove_dots(Path8Str, true);
+
+  const StringRef RootName = llvm::sys::path::root_name(Path8Str);
+  assert(!RootName.empty() &&
+         "Root name cannot be empty for an absolute path!");
+
+  // llvm::sys::path::remove_dots, used above, can leave a '/' after the root
+  // name and long paths must use '\' as the separator.
+  const size_t RootNameSize = RootName.size();
+  if (RootNameSize < Path8Str.size() && Path8Str[RootNameSize] == '/')
+    Path8Str[RootNameSize] = '\\';
+
+  SmallString<2 * MAX_PATH> FullPath(LongPathPrefix);
+  if (RootName[1] != ':') { // Check if UNC.
+    FullPath.append("UNC\\");
+    FullPath.append(Path8Str.begin() + 2, Path8Str.end());
+  } else
+    FullPath.append(Path8Str);
+
+  return UTF8ToUTF16(FullPath, Path16);
 }
-} // end namespace path
+
+} // end namespace windows
 
 namespace fs {
 
@@ -227,7 +233,9 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting,
                                  perms Perms) {
   SmallVector<wchar_t, 128> path_utf16;
 
-  if (std::error_code ec = widenPath(path, path_utf16))
+  // CreateDirectoryW has a lower maximum path length as it must leave room for
+  // an 8.3 filename.
+  if (std::error_code ec = widenPath(path, path_utf16, MAX_PATH - 12))
     return ec;
 
   if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {

diff  --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index f20538e40cc0..48954ba04735 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -151,7 +151,7 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
     if (windows::UTF8ToUTF16(fname, fnameUnicode))
       return INVALID_HANDLE_VALUE;
   } else {
-    if (path::widenPath(fname, fnameUnicode))
+    if (sys::windows::widenPath(fname, fnameUnicode))
       return INVALID_HANDLE_VALUE;
   }
   h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ,
@@ -263,7 +263,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
   fflush(stderr);
 
   SmallVector<wchar_t, MAX_PATH> ProgramUtf16;
-  if (std::error_code ec = path::widenPath(Program, ProgramUtf16)) {
+  if (std::error_code ec = sys::windows::widenPath(Program, ProgramUtf16)) {
     SetLastError(ec.value());
     MakeErrMsg(ErrMsg,
                std::string("Unable to convert application name to UTF-16"));

diff  --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index 601223b11ab4..affba601a8bc 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -28,6 +28,7 @@
 #ifdef _WIN32
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Chrono.h"
+#include "llvm/Support/Windows/WindowsSupport.h"
 #include <windows.h>
 #include <winerror.h>
 #endif
@@ -1875,4 +1876,74 @@ TEST_F(FileSystemTest, permissions) {
 #endif
 }
 
+#ifdef _WIN32
+TEST_F(FileSystemTest, widenPath) {
+  const std::wstring LongPathPrefix(L"\\\\?\\");
+
+  // Test that the length limit is checked against the UTF-16 length and not the
+  // UTF-8 length.
+  std::string Input("C:\\foldername\\");
+  const std::string Pi("\xcf\x80"); // UTF-8 lower case pi.
+  // Add Pi up to the MAX_PATH limit.
+  const size_t NumChars = MAX_PATH - Input.size() - 1;
+  for (size_t i = 0; i < NumChars; ++i)
+    Input += Pi;
+  // Check that UTF-8 length already exceeds MAX_PATH.
+  EXPECT_TRUE(Input.size() > MAX_PATH);
+  SmallVector<wchar_t, MAX_PATH + 16> Result;
+  ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+  // Result should not start with the long path prefix.
+  EXPECT_TRUE(std::wmemcmp(Result.data(), LongPathPrefix.c_str(),
+                           LongPathPrefix.size()) != 0);
+  EXPECT_EQ(Result.size(), MAX_PATH - 1);
+
+  // Add another Pi to exceed the MAX_PATH limit.
+  Input += Pi;
+  // Construct the expected result.
+  SmallVector<wchar_t, MAX_PATH + 16> Expected;
+  ASSERT_NO_ERROR(windows::UTF8ToUTF16(Input, Expected));
+  Expected.insert(Expected.begin(), LongPathPrefix.begin(),
+                  LongPathPrefix.end());
+
+  ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+  EXPECT_EQ(Result, Expected);
+
+  // Test that UNC paths are handled correctly.
+  const std::string ShareName("\\\\sharename\\");
+  const std::string FileName("\\filename");
+  // Initialize directory name so that the input is within the MAX_PATH limit.
+  const char DirChar = 'x';
+  std::string DirName(MAX_PATH - ShareName.size() - FileName.size() - 1,
+                      DirChar);
+
+  Input = ShareName + DirName + FileName;
+  ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+  // Result should not start with the long path prefix.
+  EXPECT_TRUE(std::wmemcmp(Result.data(), LongPathPrefix.c_str(),
+                           LongPathPrefix.size()) != 0);
+  EXPECT_EQ(Result.size(), MAX_PATH - 1);
+
+  // Extend the directory name so the input exceeds the MAX_PATH limit.
+  DirName += DirChar;
+  Input = ShareName + DirName + FileName;
+  // Construct the expected result.
+  ASSERT_NO_ERROR(windows::UTF8ToUTF16(StringRef(Input).substr(2), Expected));
+  const std::wstring UNCPrefix(LongPathPrefix + L"UNC\\");
+  Expected.insert(Expected.begin(), UNCPrefix.begin(), UNCPrefix.end());
+
+  ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+  EXPECT_EQ(Result, Expected);
+
+  // Check that Unix separators are handled correctly.
+  std::replace(Input.begin(), Input.end(), '\\', '/');
+  ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+  EXPECT_EQ(Result, Expected);
+
+  // Check the removal of "dots".
+  Input = ShareName + DirName + "\\.\\foo\\.\\.." + FileName;
+  ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+  EXPECT_EQ(Result, Expected);
+}
+#endif
+
 } // anonymous namespace


        


More information about the llvm-commits mailing list