[llvm] e6f6c55 - [Support] Improve Windows widenPath and add support for long UNC paths
Andrew Ng via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 19 06:01:58 PDT 2020
Author: Andrew Ng
Date: 2020-03-19T13:00:21Z
New Revision: e6f6c551213a65b66e63f3c5d84b85d2b99097f3
URL: https://github.com/llvm/llvm-project/commit/e6f6c551213a65b66e63f3c5d84b85d2b99097f3
DIFF: https://github.com/llvm/llvm-project/commit/e6f6c551213a65b66e63f3c5d84b85d2b99097f3.diff
LOG: [Support] Improve Windows widenPath and add support for long UNC paths
Check the path length limit against the length of the UTF-16 version of
the input rather than the UTF-8 equivalent, as the UTF-16 length may be
shorter. Move widenPath from the llvm::sys::path namespace in Path.h to
the llvm::sys::windows namespace in WindowsSupport.h. Only use the
reduced path length limit for create directory. Canonicalize using
sys::path::remove_dots().
Differential Revision: https://reviews.llvm.org/D75372
Added:
Modified:
llvm/include/llvm/Support/Path.h
llvm/include/llvm/Support/Windows/WindowsSupport.h
llvm/lib/Support/Windows/Path.inc
llvm/lib/Support/Windows/Program.inc
llvm/unittests/Support/Path.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h
index 97955f882d51..f0b2810cd7a9 100644
--- a/llvm/include/llvm/Support/Path.h
+++ b/llvm/include/llvm/Support/Path.h
@@ -468,10 +468,6 @@ StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
Style style = Style::native);
-#if defined(_WIN32)
-std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16);
-#endif
-
} // end namespace path
} // end namespace sys
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h
index bb7e79b86018..bd5a90c2c3f0 100644
--- a/llvm/include/llvm/Support/Windows/WindowsSupport.h
+++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h
@@ -236,6 +236,12 @@ namespace windows {
// UTF-8 regardless of the current code page setting.
std::error_code GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
BumpPtrAllocator &Alloc);
+
+/// Convert UTF-8 path to a suitable UTF-16 path for use with the Win32 Unicode
+/// File API.
+std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
+ size_t MaxPathLen = MAX_PATH);
+
} // end namespace windows
} // end namespace sys
} // end namespace llvm.
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index d634c123fbdc..ee199aa1d4b1 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -47,7 +47,7 @@ using namespace llvm;
using llvm::sys::windows::UTF8ToUTF16;
using llvm::sys::windows::CurCPToUTF16;
using llvm::sys::windows::UTF16ToUTF8;
-using llvm::sys::path::widenPath;
+using llvm::sys::windows::widenPath;
static bool is_separator(const wchar_t value) {
switch (value) {
@@ -61,64 +61,70 @@ static bool is_separator(const wchar_t value) {
namespace llvm {
namespace sys {
-namespace path {
+namespace windows {
-// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the
-// path is longer than CreateDirectory can tolerate, make it absolute and
-// prefixed by '\\?\'.
-std::error_code widenPath(const Twine &Path8,
- SmallVectorImpl<wchar_t> &Path16) {
- const size_t MaxDirLen = MAX_PATH - 12; // Must leave room for 8.3 filename.
+// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the path
+// is longer than the limit that the Win32 Unicode File API can tolerate, make
+// it an absolute normalized path prefixed by '\\?\'.
+std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
+ size_t MaxPathLen) {
+ assert(MaxPathLen <= MAX_PATH);
- // Several operations would convert Path8 to SmallString; more efficient to
- // do it once up front.
- SmallString<128> Path8Str;
+ // Several operations would convert Path8 to SmallString; more efficient to do
+ // it once up front.
+ SmallString<MAX_PATH> Path8Str;
Path8.toVector(Path8Str);
- // If we made this path absolute, how much longer would it get?
+ std::error_code EC = UTF8ToUTF16(Path8Str, Path16);
+ if (EC)
+ return EC;
+
+ const bool IsAbsolute = llvm::sys::path::is_absolute(Path8);
size_t CurPathLen;
- if (llvm::sys::path::is_absolute(Twine(Path8Str)))
+ if (IsAbsolute)
CurPathLen = 0; // No contribution from current_path needed.
else {
- CurPathLen = ::GetCurrentDirectoryW(0, NULL);
+ CurPathLen = ::GetCurrentDirectoryW(
+ 0, NULL); // Returns the size including the null terminator.
if (CurPathLen == 0)
return mapWindowsError(::GetLastError());
}
- // Would the absolute path be longer than our limit?
- if ((Path8Str.size() + CurPathLen) >= MaxDirLen &&
- !Path8Str.startswith("\\\\?\\")) {
- SmallString<2*MAX_PATH> FullPath("\\\\?\\");
- if (CurPathLen) {
- SmallString<80> CurPath;
- if (std::error_code EC = llvm::sys::fs::current_path(CurPath))
- return EC;
- FullPath.append(CurPath);
- }
- // Traverse the requested path, canonicalizing . and .. (because the \\?\
- // prefix is documented to treat them as real components). Ignore
- // separators, which can be returned from the iterator if the path has a
- // drive name. We don't need to call native() on the result since append()
- // always attaches preferred_separator.
- for (llvm::sys::path::const_iterator I = llvm::sys::path::begin(Path8Str),
- E = llvm::sys::path::end(Path8Str);
- I != E; ++I) {
- if (I->size() == 1 && is_separator((*I)[0]))
- continue;
- if (I->size() == 1 && *I == ".")
- continue;
- if (I->size() == 2 && *I == "..")
- llvm::sys::path::remove_filename(FullPath);
- else
- llvm::sys::path::append(FullPath, *I);
- }
- return UTF8ToUTF16(FullPath, Path16);
+ const char *const LongPathPrefix = "\\\\?\\";
+
+ if ((Path16.size() + CurPathLen) < MaxPathLen ||
+ Path8Str.startswith(LongPathPrefix))
+ return std::error_code();
+
+ if (!IsAbsolute) {
+ if (EC = llvm::sys::fs::make_absolute(Path8Str))
+ return EC;
}
- // Just use the caller's original path.
- return UTF8ToUTF16(Path8Str, Path16);
+ // Remove '.' and '..' because long paths treat these as real path components.
+ llvm::sys::path::remove_dots(Path8Str, true);
+
+ const StringRef RootName = llvm::sys::path::root_name(Path8Str);
+ assert(!RootName.empty() &&
+ "Root name cannot be empty for an absolute path!");
+
+ // llvm::sys::path::remove_dots, used above, can leave a '/' after the root
+ // name and long paths must use '\' as the separator.
+ const size_t RootNameSize = RootName.size();
+ if (RootNameSize < Path8Str.size() && Path8Str[RootNameSize] == '/')
+ Path8Str[RootNameSize] = '\\';
+
+ SmallString<2 * MAX_PATH> FullPath(LongPathPrefix);
+ if (RootName[1] != ':') { // Check if UNC.
+ FullPath.append("UNC\\");
+ FullPath.append(Path8Str.begin() + 2, Path8Str.end());
+ } else
+ FullPath.append(Path8Str);
+
+ return UTF8ToUTF16(FullPath, Path16);
}
-} // end namespace path
+
+} // end namespace windows
namespace fs {
@@ -227,7 +233,9 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting,
perms Perms) {
SmallVector<wchar_t, 128> path_utf16;
- if (std::error_code ec = widenPath(path, path_utf16))
+ // CreateDirectoryW has a lower maximum path length as it must leave room for
+ // an 8.3 filename.
+ if (std::error_code ec = widenPath(path, path_utf16, MAX_PATH - 12))
return ec;
if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index f20538e40cc0..48954ba04735 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -151,7 +151,7 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
if (windows::UTF8ToUTF16(fname, fnameUnicode))
return INVALID_HANDLE_VALUE;
} else {
- if (path::widenPath(fname, fnameUnicode))
+ if (sys::windows::widenPath(fname, fnameUnicode))
return INVALID_HANDLE_VALUE;
}
h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ,
@@ -263,7 +263,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
fflush(stderr);
SmallVector<wchar_t, MAX_PATH> ProgramUtf16;
- if (std::error_code ec = path::widenPath(Program, ProgramUtf16)) {
+ if (std::error_code ec = sys::windows::widenPath(Program, ProgramUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert application name to UTF-16"));
diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index 601223b11ab4..affba601a8bc 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -28,6 +28,7 @@
#ifdef _WIN32
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Chrono.h"
+#include "llvm/Support/Windows/WindowsSupport.h"
#include <windows.h>
#include <winerror.h>
#endif
@@ -1875,4 +1876,74 @@ TEST_F(FileSystemTest, permissions) {
#endif
}
+#ifdef _WIN32
+TEST_F(FileSystemTest, widenPath) {
+ const std::wstring LongPathPrefix(L"\\\\?\\");
+
+ // Test that the length limit is checked against the UTF-16 length and not the
+ // UTF-8 length.
+ std::string Input("C:\\foldername\\");
+ const std::string Pi("\xcf\x80"); // UTF-8 lower case pi.
+ // Add Pi up to the MAX_PATH limit.
+ const size_t NumChars = MAX_PATH - Input.size() - 1;
+ for (size_t i = 0; i < NumChars; ++i)
+ Input += Pi;
+ // Check that UTF-8 length already exceeds MAX_PATH.
+ EXPECT_TRUE(Input.size() > MAX_PATH);
+ SmallVector<wchar_t, MAX_PATH + 16> Result;
+ ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+ // Result should not start with the long path prefix.
+ EXPECT_TRUE(std::wmemcmp(Result.data(), LongPathPrefix.c_str(),
+ LongPathPrefix.size()) != 0);
+ EXPECT_EQ(Result.size(), MAX_PATH - 1);
+
+ // Add another Pi to exceed the MAX_PATH limit.
+ Input += Pi;
+ // Construct the expected result.
+ SmallVector<wchar_t, MAX_PATH + 16> Expected;
+ ASSERT_NO_ERROR(windows::UTF8ToUTF16(Input, Expected));
+ Expected.insert(Expected.begin(), LongPathPrefix.begin(),
+ LongPathPrefix.end());
+
+ ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+ EXPECT_EQ(Result, Expected);
+
+ // Test that UNC paths are handled correctly.
+ const std::string ShareName("\\\\sharename\\");
+ const std::string FileName("\\filename");
+ // Initialize directory name so that the input is within the MAX_PATH limit.
+ const char DirChar = 'x';
+ std::string DirName(MAX_PATH - ShareName.size() - FileName.size() - 1,
+ DirChar);
+
+ Input = ShareName + DirName + FileName;
+ ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+ // Result should not start with the long path prefix.
+ EXPECT_TRUE(std::wmemcmp(Result.data(), LongPathPrefix.c_str(),
+ LongPathPrefix.size()) != 0);
+ EXPECT_EQ(Result.size(), MAX_PATH - 1);
+
+ // Extend the directory name so the input exceeds the MAX_PATH limit.
+ DirName += DirChar;
+ Input = ShareName + DirName + FileName;
+ // Construct the expected result.
+ ASSERT_NO_ERROR(windows::UTF8ToUTF16(StringRef(Input).substr(2), Expected));
+ const std::wstring UNCPrefix(LongPathPrefix + L"UNC\\");
+ Expected.insert(Expected.begin(), UNCPrefix.begin(), UNCPrefix.end());
+
+ ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+ EXPECT_EQ(Result, Expected);
+
+ // Check that Unix separators are handled correctly.
+ std::replace(Input.begin(), Input.end(), '\\', '/');
+ ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+ EXPECT_EQ(Result, Expected);
+
+ // Check the removal of "dots".
+ Input = ShareName + DirName + "\\.\\foo\\.\\.." + FileName;
+ ASSERT_NO_ERROR(windows::widenPath(Input, Result));
+ EXPECT_EQ(Result, Expected);
+}
+#endif
+
} // anonymous namespace
More information about the llvm-commits
mailing list