r263617 - Reapply: [VFS] Add support for handling path traversals
Bruno Cardoso Lopes via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 15 21:39:39 PDT 2016
Author: bruno
Date: Tue Mar 15 23:39:38 2016
New Revision: 263617
URL: http://llvm.org/viewvc/llvm-project?rev=263617&view=rev
Log:
Reapply: [VFS] Add support for handling path traversals
This is originally r261551, reverted because of windows bots failing on
unittests. Change the current behavior to do not handle path traversals
on windows.
Handle ".", ".." and "./" with trailing slashes while collecting files
to be dumped into the vfs overlay directory.
Include the support for symlinks into components. Given the path:
/install-dir/bin/../lib/clang/3.8.0/include/altivec.h, if "bin"
component is a symlink, it's not safe to use `path::remove_dots` here,
and `realpath` is used to get the right answer. Since `realpath`
is expensive, we only do it at collecting time (which only happens
during the crash reproducer) and cache the base directory for fast lookups.
Overall, this makes the input to the VFS YAML file to be canonicalized
to never contain traversal components.
Differential Revision: http://reviews.llvm.org/D17104
rdar://problem/24499339
Added:
cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m
cfe/trunk/test/Modules/crash-vfs-path-traversal.m
Modified:
cfe/trunk/lib/Basic/VirtualFileSystem.cpp
cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp
Modified: cfe/trunk/lib/Basic/VirtualFileSystem.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/VirtualFileSystem.cpp?rev=263617&r1=263616&r2=263617&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/VirtualFileSystem.cpp (original)
+++ cfe/trunk/lib/Basic/VirtualFileSystem.cpp Tue Mar 15 23:39:38 2016
@@ -112,6 +112,20 @@ bool FileSystem::exists(const Twine &Pat
return Status && Status->exists();
}
+#ifndef NDEBUG
+static bool isTraversalComponent(StringRef Component) {
+ return Component.equals("..") || Component.equals(".");
+}
+
+static bool pathHasTraversal(StringRef Path) {
+ using namespace llvm::sys;
+ for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
+ if (isTraversalComponent(Comp))
+ return true;
+ return false;
+}
+#endif
+
//===-----------------------------------------------------------------------===/
// RealFileSystem implementation
//===-----------------------------------------------------------------------===/
@@ -819,6 +833,16 @@ class RedirectingFileSystem : public vfs
bool UseExternalNames;
/// @}
+ /// Virtual file paths and external files could be canonicalized without "..",
+ /// "." and "./" in their paths. FIXME: some unittests currently fail on
+ /// win32 when using remove_dots and remove_leading_dotslash on paths.
+ bool UseCanonicalizedPaths =
+#ifdef LLVM_ON_WIN32
+ false;
+#else
+ true;
+#endif
+
friend class RedirectingFileSystemParser;
private:
@@ -954,7 +978,7 @@ class RedirectingFileSystemParser {
return true;
}
- std::unique_ptr<Entry> parseEntry(yaml::Node *N) {
+ std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem *FS) {
yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N);
if (!M) {
error(N, "expected mapping node for file or directory entry");
@@ -994,7 +1018,17 @@ class RedirectingFileSystemParser {
if (Key == "name") {
if (!parseScalarString(I->getValue(), Value, Buffer))
return nullptr;
- Name = Value;
+
+ if (FS->UseCanonicalizedPaths) {
+ SmallString<256> Path(Value);
+ // Guarantee that old YAML files containing paths with ".." and "."
+ // are properly canonicalized before read into the VFS.
+ Path = sys::path::remove_leading_dotslash(Path);
+ sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ Name = Path.str();
+ } else {
+ Name = Value;
+ }
} else if (Key == "type") {
if (!parseScalarString(I->getValue(), Value, Buffer))
return nullptr;
@@ -1024,7 +1058,7 @@ class RedirectingFileSystemParser {
for (yaml::SequenceNode::iterator I = Contents->begin(),
E = Contents->end();
I != E; ++I) {
- if (std::unique_ptr<Entry> E = parseEntry(&*I))
+ if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
EntryArrayContents.push_back(std::move(E));
else
return nullptr;
@@ -1038,7 +1072,16 @@ class RedirectingFileSystemParser {
HasContents = true;
if (!parseScalarString(I->getValue(), Value, Buffer))
return nullptr;
- ExternalContentsPath = Value;
+ if (FS->UseCanonicalizedPaths) {
+ SmallString<256> Path(Value);
+ // Guarantee that old YAML files containing paths with ".." and "."
+ // are properly canonicalized before read into the VFS.
+ Path = sys::path::remove_leading_dotslash(Path);
+ sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ ExternalContentsPath = Path.str();
+ } else {
+ ExternalContentsPath = Value;
+ }
} else if (Key == "use-external-name") {
bool Val;
if (!parseScalarBool(I->getValue(), Val))
@@ -1149,7 +1192,7 @@ public:
for (yaml::SequenceNode::iterator I = Roots->begin(), E = Roots->end();
I != E; ++I) {
- if (std::unique_ptr<Entry> E = parseEntry(&*I))
+ if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
FS->Roots.push_back(std::move(E));
else
return false;
@@ -1228,6 +1271,14 @@ ErrorOr<Entry *> RedirectingFileSystem::
if (std::error_code EC = makeAbsolute(Path))
return EC;
+ // Canonicalize path by removing ".", "..", "./", etc components. This is
+ // a VFS request, do bot bother about symlinks in the path components
+ // but canonicalize in order to perform the correct entry search.
+ if (UseCanonicalizedPaths) {
+ Path = sys::path::remove_leading_dotslash(Path);
+ sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ }
+
if (Path.empty())
return make_error_code(llvm::errc::invalid_argument);
@@ -1244,10 +1295,17 @@ ErrorOr<Entry *> RedirectingFileSystem::
ErrorOr<Entry *>
RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
sys::path::const_iterator End, Entry *From) {
+#ifndef LLVM_ON_WIN32
+ assert(!isTraversalComponent(*Start) &&
+ !isTraversalComponent(From->getName()) &&
+ "Paths should not contain traversal components");
+#else
+ // FIXME: this is here to support windows, remove it once canonicalized
+ // paths become globally default.
if (Start->equals("."))
++Start;
+#endif
- // FIXME: handle ..
if (CaseSensitive ? !Start->equals(From->getName())
: !Start->equals_lower(From->getName()))
// failure to match
@@ -1366,16 +1424,6 @@ UniqueID vfs::getNextVirtualUniqueID() {
return UniqueID(std::numeric_limits<uint64_t>::max(), ID);
}
-#ifndef NDEBUG
-static bool pathHasTraversal(StringRef Path) {
- using namespace llvm::sys;
- for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
- if (Comp == "." || Comp == "..")
- return true;
- return false;
-}
-#endif
-
void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) {
assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute");
assert(sys::path::is_absolute(RealPath) && "real path not absolute");
Modified: cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp?rev=263617&r1=263616&r2=263617&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp (original)
+++ cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp Tue Mar 15 23:39:38 2016
@@ -13,8 +13,9 @@
#include "clang/Frontend/Utils.h"
#include "clang/Serialization/ASTReader.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/config.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -25,7 +26,9 @@ namespace {
/// Private implementation for ModuleDependencyCollector
class ModuleDependencyListener : public ASTReaderListener {
ModuleDependencyCollector &Collector;
+ llvm::StringMap<std::string> SymLinkMap;
+ bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
std::error_code copyToRoot(StringRef Src);
public:
ModuleDependencyListener(ModuleDependencyCollector &Collector)
@@ -57,6 +60,48 @@ void ModuleDependencyCollector::writeFil
VFSWriter.write(OS);
}
+// TODO: move this to Support/Path.h?
+static bool real_path(StringRef SrcPath, SmallVectorImpl<char> &RealPath) {
+#ifdef HAVE_REALPATH
+ char CanonicalPath[PATH_MAX];
+
+ // TODO: emit a warning in case this fails...?
+ if (!realpath(SrcPath.str().c_str(), CanonicalPath))
+ return false;
+
+ SmallString<256> RPath(CanonicalPath);
+ RealPath.swap(RPath);
+ return true;
+#else
+ // FIXME: Add support for systems without realpath.
+ return false;
+#endif
+}
+
+bool ModuleDependencyListener::getRealPath(StringRef SrcPath,
+ SmallVectorImpl<char> &Result) {
+ using namespace llvm::sys;
+ SmallString<256> RealPath;
+ StringRef FileName = path::filename(SrcPath);
+ std::string Dir = path::parent_path(SrcPath).str();
+ auto DirWithSymLink = SymLinkMap.find(Dir);
+
+ // Use real_path to fix any symbolic link component present in a path.
+ // Computing the real path is expensive, cache the search through the
+ // parent path directory.
+ if (DirWithSymLink == SymLinkMap.end()) {
+ if (!real_path(Dir, RealPath))
+ return false;
+ SymLinkMap[Dir] = RealPath.str();
+ } else {
+ RealPath = DirWithSymLink->second;
+ }
+
+ path::append(RealPath, FileName);
+ Result.swap(RealPath);
+ return true;
+}
+
std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) {
using namespace llvm::sys;
@@ -65,22 +110,42 @@ std::error_code ModuleDependencyListener
fs::make_absolute(AbsoluteSrc);
// Canonicalize to a native path to avoid mixed separator styles.
path::native(AbsoluteSrc);
- // TODO: We probably need to handle .. as well as . in order to have valid
- // input to the YAMLVFSWriter.
- path::remove_dots(AbsoluteSrc);
+ // Remove redundant leading "./" pieces and consecutive separators.
+ AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
+
+ // Canonicalize path by removing "..", "." components.
+ SmallString<256> CanonicalPath = AbsoluteSrc;
+ path::remove_dots(CanonicalPath, /*remove_dot_dot=*/true);
+
+ // If a ".." component is present after a symlink component, remove_dots may
+ // lead to the wrong real destination path. Let the source be canonicalized
+ // like that but make sure the destination uses the real path.
+ bool HasDotDotInPath =
+ std::count(path::begin(AbsoluteSrc), path::end(AbsoluteSrc), "..") > 0;
+ SmallString<256> RealPath;
+ bool HasRemovedSymlinkComponent = HasDotDotInPath &&
+ getRealPath(AbsoluteSrc, RealPath) &&
+ !StringRef(CanonicalPath).equals(RealPath);
// Build the destination path.
SmallString<256> Dest = Collector.getDest();
- path::append(Dest, path::relative_path(AbsoluteSrc));
+ path::append(Dest, path::relative_path(HasRemovedSymlinkComponent ? RealPath
+ : CanonicalPath));
// Copy the file into place.
if (std::error_code EC = fs::create_directories(path::parent_path(Dest),
/*IgnoreExisting=*/true))
return EC;
- if (std::error_code EC = fs::copy_file(AbsoluteSrc, Dest))
+ if (std::error_code EC = fs::copy_file(
+ HasRemovedSymlinkComponent ? RealPath : CanonicalPath, Dest))
return EC;
- // Use the absolute path under the root for the file mapping.
- Collector.addFileMapping(AbsoluteSrc, Dest);
+
+ // Use the canonical path under the root for the file mapping. Also create
+ // an additional entry for the real path.
+ Collector.addFileMapping(CanonicalPath, Dest);
+ if (HasRemovedSymlinkComponent)
+ Collector.addFileMapping(RealPath, Dest);
+
return std::error_code();
}
Added: cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m?rev=263617&view=auto
==============================================================================
--- cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m (added)
+++ cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m Tue Mar 15 23:39:38 2016
@@ -0,0 +1,72 @@
+// REQUIRES: crash-recovery, shell
+
+// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it?
+// XFAIL: mingw32
+
+// Test that clang is capable of collecting the right header files in the
+// crash reproducer if there's a symbolic link component in the path.
+
+// RUN: rm -rf %t
+// RUN: mkdir -p %t/i %t/m %t %t/sysroot
+// RUN: cp -a %S/Inputs/System/usr %t/i/
+// RUN: ln -s include/tcl-private %t/i/usr/x
+
+// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
+// RUN: %clang -fsyntax-only %s -I %/t/i -isysroot %/t/sysroot/ \
+// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s
+
+// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m
+// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh
+// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \
+// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml
+// RUN: find %t/crash-vfs-*.cache/vfs | \
+// RUN: grep "usr/include/stdio.h" | count 1
+
+#include "usr/x/../stdio.h"
+
+// CHECK: Preprocessed source(s) and associated run script(s) are located at:
+// CHECK-NEXT: note: diagnostic msg: {{.*}}.m
+// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache
+
+// CHECKSRC: @import cstd.stdio;
+
+// CHECKSH: # Crash reproducer
+// CHECKSH-NEXT: # Driver args: "-fsyntax-only"
+// CHECKSH-NEXT: # Original command: {{.*$}}
+// CHECKSH-NEXT: "-cc1"
+// CHECKSH: "-isysroot" "{{[^"]*}}/sysroot/"
+// CHECKSH-NOT: "-fmodules-cache-path="
+// CHECKSH: "crash-vfs-{{[^ ]*}}.m"
+// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml"
+
+// CHECKYAML: 'type': 'directory'
+// CHECKYAML: 'name': "{{[^ ]*}}/i/usr/include",
+// CHECKYAML-NEXT: 'contents': [
+// CHECKYAML-NEXT: {
+// CHECKYAML-NEXT: 'type': 'file',
+// CHECKYAML-NEXT: 'name': "module.map",
+// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/module.map"
+// CHECKYAML-NEXT: },
+
+// CHECKYAML: 'type': 'directory'
+// CHECKYAML: 'name': "{{[^ ]*}}/i/usr",
+// CHECKYAML-NEXT: 'contents': [
+// CHECKYAML-NEXT: {
+// CHECKYAML-NEXT: 'type': 'file',
+// CHECKYAML-NEXT: 'name': "module.map",
+// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/module.map"
+// CHECKYAML-NEXT: },
+
+// Test that by using the previous generated YAML file clang is able to find the
+// right files inside the overlay and map the virtual request for a path that
+// previously contained a symlink to work. To make sure of this, wipe out the
+// %/t/i directory containing the symlink component.
+
+// RUN: rm -rf %/t/i
+// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH
+// RUN: %clang -E %s -I %/t/i -isysroot %/t/sysroot/ \
+// RUN: -ivfsoverlay %t/crash-vfs-*.cache/vfs/vfs.yaml -fmodules \
+// RUN: -fmodules-cache-path=%t/m/ 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY
+
+// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/stdio.h"
Added: cfe/trunk/test/Modules/crash-vfs-path-traversal.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-traversal.m?rev=263617&view=auto
==============================================================================
--- cfe/trunk/test/Modules/crash-vfs-path-traversal.m (added)
+++ cfe/trunk/test/Modules/crash-vfs-path-traversal.m Tue Mar 15 23:39:38 2016
@@ -0,0 +1,60 @@
+// REQUIRES: crash-recovery, shell, non-ms-sdk, non-ps4-sdk
+
+// FIXME: Canonicalizing paths to remove relative traversal components
+// currenty fails a unittest on windows and is disable by default.
+// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it?
+// XFAIL: mingw32
+
+// RUN: rm -rf %t
+// RUN: mkdir -p %t/i %t/m %t
+
+// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
+// RUN: %clang -fsyntax-only %s -I %S/Inputs/System -isysroot %/t/i/ \
+// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s
+
+// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m
+// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh
+// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \
+// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml
+// RUN: find %t/crash-vfs-*.cache/vfs | \
+// RUN: grep "Inputs/System/usr/include/stdio.h" | count 1
+
+#include "usr/././//////include/../include/./././../include/stdio.h"
+
+// CHECK: Preprocessed source(s) and associated run script(s) are located at:
+// CHECK-NEXT: note: diagnostic msg: {{.*}}.m
+// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache
+
+// CHECKSRC: @import cstd.stdio;
+
+// CHECKSH: # Crash reproducer
+// CHECKSH-NEXT: # Driver args: "-fsyntax-only"
+// CHECKSH-NEXT: # Original command: {{.*$}}
+// CHECKSH-NEXT: "-cc1"
+// CHECKSH: "-isysroot" "{{[^"]*}}/i/"
+// CHECKSH-NOT: "-fmodules-cache-path="
+// CHECKSH: "crash-vfs-{{[^ ]*}}.m"
+// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml"
+
+// CHECKYAML: 'type': 'directory'
+// CHECKYAML: 'name': "{{[^ ]*}}/Inputs/System/usr/include",
+// CHECKYAML-NEXT: 'contents': [
+// CHECKYAML-NEXT: {
+// CHECKYAML-NEXT: 'type': 'file',
+// CHECKYAML-NEXT: 'name': "module.map",
+// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}/Inputs/System/usr/include/module.map"
+// CHECKYAML-NEXT: },
+
+// Replace the paths in the YAML files with relative ".." traversals
+// and fed into clang to test whether we're correctly representing them
+// in the VFS overlay.
+
+// RUN: sed -e "s at usr/include at usr/include/../include at g" \
+// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > %t/vfs.yaml
+// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH
+// RUN: %clang -E %s -I %S/Inputs/System -isysroot %/t/i/ \
+// RUN: -ivfsoverlay %t/vfs.yaml -fmodules \
+// RUN: -fmodules-cache-path=%t/m/ 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY
+
+// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/usr/include/stdio.h"
More information about the cfe-commits
mailing list