[llvm] fb5fd74 - Revert "Optimize path::remove_dots"

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Sun May 3 09:47:36 PDT 2020


Author: Nico Weber
Date: 2020-05-03T12:46:46-04:00
New Revision: fb5fd74685e728b1d5e68d33e9842bcd734b98e6

URL: https://github.com/llvm/llvm-project/commit/fb5fd74685e728b1d5e68d33e9842bcd734b98e6
DIFF: https://github.com/llvm/llvm-project/commit/fb5fd74685e728b1d5e68d33e9842bcd734b98e6.diff

LOG: Revert "Optimize path::remove_dots"

This reverts commit 53913a65b408ade2956061b4c0aaed6bba907403.
Breaks VFSFromYAMLTest.DirectoryIterationSameDirMultipleEntries
in SupportTests on non-Windows.

Added: 
    

Modified: 
    llvm/lib/Support/Path.cpp
    llvm/lib/Support/Windows/Path.inc
    llvm/unittests/Support/Path.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index f5ab2504a83f..6f065b9c5a3c 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -684,66 +684,43 @@ StringRef remove_leading_dotslash(StringRef Path, Style style) {
   return Path;
 }
 
-// Remove path traversal components ("." and "..") when possible, and
-// canonicalize slashes.
-bool remove_dots(SmallVectorImpl<char> &the_path, bool remove_dot_dot,
-                 Style style) {
-  style = real_style(style);
-  StringRef remaining(the_path.data(), the_path.size());
-  bool needs_change = false;
+static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot,
+                                    Style style) {
   SmallVector<StringRef, 16> components;
 
-  // Consume the root path, if present.
-  StringRef root = path::root_path(remaining, style);
-  bool absolute = !root.empty();
-  if (absolute)
-    remaining = remaining.drop_front(root.size());
-
-  // Loop over path components manually. This makes it easier to detect
-  // non-preferred slashes and double separators that must be canonicalized.
-  while (!remaining.empty()) {
-    size_t next_slash = remaining.find_first_of(separators(style));
-    if (next_slash == StringRef::npos)
-      next_slash = remaining.size();
-    StringRef component = remaining.take_front(next_slash);
-    remaining = remaining.drop_front(next_slash);
-
-    // Eat the slash, and check if it is the preferred separator.
-    if (!remaining.empty()) {
-      needs_change |= remaining.front() != preferred_separator(style);
-      remaining = remaining.drop_front();
-    }
-
-    // Check for path traversal components or double separators.
-    if (component.empty() || component == ".") {
-      needs_change = true;
-    } else if (remove_dot_dot && component == "..") {
-      needs_change = true;
-      // Do not allow ".." to remove the root component. If this is the
-      // beginning of a relative path, keep the ".." component.
+  // Skip the root path, then look for traversal in the components.
+  StringRef rel = path::relative_path(path, style);
+  for (StringRef C :
+       llvm::make_range(path::begin(rel, style), path::end(rel))) {
+    if (C == ".")
+      continue;
+    // Leading ".." will remain in the path unless it's at the root.
+    if (remove_dot_dot && C == "..") {
       if (!components.empty() && components.back() != "..") {
         components.pop_back();
-      } else if (!absolute) {
-        components.push_back(component);
+        continue;
       }
-    } else {
-      components.push_back(component);
+      if (path::is_absolute(path, style))
+        continue;
     }
+    components.push_back(C);
   }
 
-  // Avoid rewriting the path unless we have to.
-  if (!needs_change)
+  SmallString<256> buffer = path::root_path(path, style);
+  for (StringRef C : components)
+    path::append(buffer, style, C);
+  return buffer;
+}
+
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot,
+                 Style style) {
+  StringRef p(path.data(), path.size());
+
+  SmallString<256> result = remove_dots(p, remove_dot_dot, style);
+  if (result == path)
     return false;
 
-  SmallString<256> buffer = root;
-  if (!components.empty()) {
-    buffer += components[0];
-    for (StringRef C : makeArrayRef(components).drop_front()) {
-      buffer += preferred_separator(style);
-      buffer += C;
-    }
-  }
-  the_path.swap(buffer);
+  path.swap(result);
   return true;
 }
 

diff  --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index ec62e656ddf0..2e1488479095 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -101,13 +101,18 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
   }
 
   // Remove '.' and '..' because long paths treat these as real path components.
-  llvm::sys::path::native(Path8Str, path::Style::windows);
   llvm::sys::path::remove_dots(Path8Str, true);
 
   const StringRef RootName = llvm::sys::path::root_name(Path8Str);
   assert(!RootName.empty() &&
          "Root name cannot be empty for an absolute path!");
 
+  // llvm::sys::path::remove_dots, used above, can leave a '/' after the root
+  // name and long paths must use '\' as the separator.
+  const size_t RootNameSize = RootName.size();
+  if (RootNameSize < Path8Str.size() && Path8Str[RootNameSize] == '/')
+    Path8Str[RootNameSize] = '\\';
+
   SmallString<2 * MAX_PATH> FullPath(LongPathPrefix);
   if (RootName[1] != ':') { // Check if UNC.
     FullPath.append("UNC\\");

diff  --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index 8c530487c154..3030fb2ebb2e 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -1253,22 +1253,6 @@ TEST(Support, RemoveDots) {
             remove_dots("..\\a\\b\\..\\c", true, path::Style::windows));
   EXPECT_EQ("..\\..\\a\\c",
             remove_dots("..\\..\\a\\b\\..\\c", true, path::Style::windows));
-  EXPECT_EQ("C:\\a\\c", remove_dots("C:\\foo\\bar//..\\..\\a\\c", true,
-                                    path::Style::windows));
-
-  // FIXME: These leading forward slashes are emergent behavior. VFS depends on
-  // this behavior now.
-  EXPECT_EQ("C:/bar",
-            remove_dots("C:/foo/../bar", true, path::Style::windows));
-  EXPECT_EQ("C:/foo\\bar",
-            remove_dots("C:/foo/bar", true, path::Style::windows));
-  EXPECT_EQ("C:/foo\\bar",
-            remove_dots("C:/foo\\bar", true, path::Style::windows));
-  EXPECT_EQ("/", remove_dots("/", true, path::Style::windows));
-  EXPECT_EQ("C:/", remove_dots("C:/", true, path::Style::windows));
-
-  // A double separator is rewritten.
-  EXPECT_EQ("C:/foo\\bar", remove_dots("C:/foo//bar", true, path::Style::windows));
 
   SmallString<64> Path1(".\\.\\c");
   EXPECT_TRUE(path::remove_dots(Path1, true, path::Style::windows));
@@ -1287,7 +1271,6 @@ TEST(Support, RemoveDots) {
   EXPECT_EQ("/a/c", remove_dots("/../../a/c", true, path::Style::posix));
   EXPECT_EQ("/a/c",
             remove_dots("/../a/b//../././/c", true, path::Style::posix));
-  EXPECT_EQ("/", remove_dots("/", true, path::Style::posix));
 
   SmallString<64> Path2("././c");
   EXPECT_TRUE(path::remove_dots(Path2, true, path::Style::posix));


        


More information about the llvm-commits mailing list