r263686 - Reapply [2]: [VFS] Add support for handling path traversals
Sean Silva via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 6 18:59:39 PDT 2016
On Wed, Mar 16, 2016 at 7:20 PM, Bruno Cardoso Lopes via cfe-commits <
cfe-commits at lists.llvm.org> wrote:
> Author: bruno
> Date: Wed Mar 16 21:20:43 2016
> New Revision: 263686
>
> URL: http://llvm.org/viewvc/llvm-project?rev=263686&view=rev
> Log:
> Reapply [2]: [VFS] Add support for handling path traversals
>
> This was applied twice r261551 and 263617 and later reverted because:
>
> (1) Windows bot failing on unittests. Change the current behavior to do
> not handle path traversals on windows.
>
> (2) Windows bot failed to include llvm/Config/config.h in order to use
> HAVE_REALPATH. Use LLVM_ON_UNIX instead, as done in
> lib/Basic/FileManager.cpp.
>
> Handle ".", ".." and "./" with trailing slashes while collecting files
> to be dumped into the vfs overlay directory.
>
> Include the support for symlinks into components. Given the path:
>
> /install-dir/bin/../lib/clang/3.8.0/include/altivec.h, if "bin"
> component is a symlink, it's not safe to use `path::remove_dots` here,
> and `realpath` is used to get the right answer. Since `realpath`
> is expensive, we only do it at collecting time (which only happens
> during the crash reproducer) and cache the base directory for fast lookups.
>
> Overall, this makes the input to the VFS YAML file to be canonicalized
> to never contain traversal components.
>
> Differential Revision: http://reviews.llvm.org/D17104
>
> rdar://problem/24499339
>
> Added:
> cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m
> cfe/trunk/test/Modules/crash-vfs-path-traversal.m
> Modified:
> cfe/trunk/lib/Basic/VirtualFileSystem.cpp
> cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp
>
> Modified: cfe/trunk/lib/Basic/VirtualFileSystem.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/VirtualFileSystem.cpp?rev=263686&r1=263685&r2=263686&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Basic/VirtualFileSystem.cpp (original)
> +++ cfe/trunk/lib/Basic/VirtualFileSystem.cpp Wed Mar 16 21:20:43 2016
> @@ -112,6 +112,20 @@ bool FileSystem::exists(const Twine &Pat
> return Status && Status->exists();
> }
>
> +#ifndef NDEBUG
> +static bool isTraversalComponent(StringRef Component) {
> + return Component.equals("..") || Component.equals(".");
> +}
> +
> +static bool pathHasTraversal(StringRef Path) {
> + using namespace llvm::sys;
> + for (StringRef Comp : llvm::make_range(path::begin(Path),
> path::end(Path)))
> + if (isTraversalComponent(Comp))
> + return true;
> + return false;
> +}
> +#endif
> +
>
> //===-----------------------------------------------------------------------===/
> // RealFileSystem implementation
>
> //===-----------------------------------------------------------------------===/
> @@ -819,6 +833,16 @@ class RedirectingFileSystem : public vfs
> bool UseExternalNames;
> /// @}
>
> + /// Virtual file paths and external files could be canonicalized
> without "..",
> + /// "." and "./" in their paths. FIXME: some unittests currently fail on
> + /// win32 when using remove_dots and remove_leading_dotslash on paths.
> + bool UseCanonicalizedPaths =
> +#ifdef LLVM_ON_WIN32
> + false;
> +#else
> + true;
> +#endif
> +
> friend class RedirectingFileSystemParser;
>
> private:
> @@ -954,7 +978,7 @@ class RedirectingFileSystemParser {
> return true;
> }
>
> - std::unique_ptr<Entry> parseEntry(yaml::Node *N) {
> + std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem
> *FS) {
> yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N);
> if (!M) {
> error(N, "expected mapping node for file or directory entry");
> @@ -994,7 +1018,17 @@ class RedirectingFileSystemParser {
> if (Key == "name") {
> if (!parseScalarString(I->getValue(), Value, Buffer))
> return nullptr;
> - Name = Value;
> +
> + if (FS->UseCanonicalizedPaths) {
> + SmallString<256> Path(Value);
> + // Guarantee that old YAML files containing paths with ".." and
> "."
> + // are properly canonicalized before read into the VFS.
> + Path = sys::path::remove_leading_dotslash(Path);
> + sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
> + Name = Path.str();
> + } else {
> + Name = Value;
> + }
> } else if (Key == "type") {
> if (!parseScalarString(I->getValue(), Value, Buffer))
> return nullptr;
> @@ -1024,7 +1058,7 @@ class RedirectingFileSystemParser {
> for (yaml::SequenceNode::iterator I = Contents->begin(),
> E = Contents->end();
> I != E; ++I) {
> - if (std::unique_ptr<Entry> E = parseEntry(&*I))
> + if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
> EntryArrayContents.push_back(std::move(E));
> else
> return nullptr;
> @@ -1038,7 +1072,16 @@ class RedirectingFileSystemParser {
> HasContents = true;
> if (!parseScalarString(I->getValue(), Value, Buffer))
> return nullptr;
> - ExternalContentsPath = Value;
> + if (FS->UseCanonicalizedPaths) {
> + SmallString<256> Path(Value);
> + // Guarantee that old YAML files containing paths with ".." and
> "."
> + // are properly canonicalized before read into the VFS.
> + Path = sys::path::remove_leading_dotslash(Path);
> + sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
> + ExternalContentsPath = Path.str();
> + } else {
> + ExternalContentsPath = Value;
> + }
> } else if (Key == "use-external-name") {
> bool Val;
> if (!parseScalarBool(I->getValue(), Val))
> @@ -1149,7 +1192,7 @@ public:
>
> for (yaml::SequenceNode::iterator I = Roots->begin(), E =
> Roots->end();
> I != E; ++I) {
> - if (std::unique_ptr<Entry> E = parseEntry(&*I))
> + if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
> FS->Roots.push_back(std::move(E));
> else
> return false;
> @@ -1228,6 +1271,14 @@ ErrorOr<Entry *> RedirectingFileSystem::
> if (std::error_code EC = makeAbsolute(Path))
> return EC;
>
> + // Canonicalize path by removing ".", "..", "./", etc components. This
> is
> + // a VFS request, do bot bother about symlinks in the path components
> + // but canonicalize in order to perform the correct entry search.
> + if (UseCanonicalizedPaths) {
> + Path = sys::path::remove_leading_dotslash(Path);
> + sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
> + }
> +
> if (Path.empty())
> return make_error_code(llvm::errc::invalid_argument);
>
> @@ -1244,10 +1295,17 @@ ErrorOr<Entry *> RedirectingFileSystem::
> ErrorOr<Entry *>
> RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
> sys::path::const_iterator End, Entry
> *From) {
> +#ifndef LLVM_ON_WIN32
> + assert(!isTraversalComponent(*Start) &&
> + !isTraversalComponent(From->getName()) &&
> + "Paths should not contain traversal components");
> +#else
> + // FIXME: this is here to support windows, remove it once canonicalized
> + // paths become globally default.
> if (Start->equals("."))
> ++Start;
> +#endif
>
> - // FIXME: handle ..
> if (CaseSensitive ? !Start->equals(From->getName())
> : !Start->equals_lower(From->getName()))
> // failure to match
> @@ -1366,16 +1424,6 @@ UniqueID vfs::getNextVirtualUniqueID() {
> return UniqueID(std::numeric_limits<uint64_t>::max(), ID);
> }
>
> -#ifndef NDEBUG
> -static bool pathHasTraversal(StringRef Path) {
> - using namespace llvm::sys;
> - for (StringRef Comp : llvm::make_range(path::begin(Path),
> path::end(Path)))
> - if (Comp == "." || Comp == "..")
> - return true;
> - return false;
> -}
> -#endif
> -
> void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef
> RealPath) {
> assert(sys::path::is_absolute(VirtualPath) && "virtual path not
> absolute");
> assert(sys::path::is_absolute(RealPath) && "real path not absolute");
>
> Modified: cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp?rev=263686&r1=263685&r2=263686&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp (original)
> +++ cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp Wed Mar 16
> 21:20:43 2016
> @@ -13,8 +13,9 @@
>
> #include "clang/Frontend/Utils.h"
> #include "clang/Serialization/ASTReader.h"
> -#include "llvm/ADT/StringSet.h"
> +#include "llvm/ADT/StringMap.h"
> #include "llvm/ADT/iterator_range.h"
> +#include "llvm/Config/config.h"
> #include "llvm/Support/FileSystem.h"
> #include "llvm/Support/Path.h"
> #include "llvm/Support/raw_ostream.h"
> @@ -25,7 +26,9 @@ namespace {
> /// Private implementation for ModuleDependencyCollector
> class ModuleDependencyListener : public ASTReaderListener {
> ModuleDependencyCollector &Collector;
> + llvm::StringMap<std::string> SymLinkMap;
>
> + bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result);
> std::error_code copyToRoot(StringRef Src);
> public:
> ModuleDependencyListener(ModuleDependencyCollector &Collector)
> @@ -57,6 +60,48 @@ void ModuleDependencyCollector::writeFil
> VFSWriter.write(OS);
> }
>
> +// TODO: move this to Support/Path.h and check for HAVE_REALPATH?
> +static bool real_path(StringRef SrcPath, SmallVectorImpl<char> &RealPath)
> {
> +#ifdef LLVM_ON_UNIX
> + char CanonicalPath[PATH_MAX];
> +
> + // TODO: emit a warning in case this fails...?
> + if (!realpath(SrcPath.str().c_str(), CanonicalPath))
> + return false;
> +
> + SmallString<256> RPath(CanonicalPath);
> + RealPath.swap(RPath);
> + return true;
> +#else
> + // FIXME: Add support for systems without realpath.
> + return false;
> +#endif
> +}
>
We already have a similar call to realpath in lib/Basic/FileManager.cpp
(ugly #ifdef and all)
Could you avoid duplicating it?
-- Sean Silva
> +
> +bool ModuleDependencyListener::getRealPath(StringRef SrcPath,
> + SmallVectorImpl<char> &Result)
> {
> + using namespace llvm::sys;
> + SmallString<256> RealPath;
> + StringRef FileName = path::filename(SrcPath);
> + std::string Dir = path::parent_path(SrcPath).str();
> + auto DirWithSymLink = SymLinkMap.find(Dir);
> +
> + // Use real_path to fix any symbolic link component present in a path.
> + // Computing the real path is expensive, cache the search through the
> + // parent path directory.
> + if (DirWithSymLink == SymLinkMap.end()) {
> + if (!real_path(Dir, RealPath))
> + return false;
> + SymLinkMap[Dir] = RealPath.str();
> + } else {
> + RealPath = DirWithSymLink->second;
> + }
> +
> + path::append(RealPath, FileName);
> + Result.swap(RealPath);
> + return true;
> +}
> +
> std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) {
> using namespace llvm::sys;
>
> @@ -65,22 +110,42 @@ std::error_code ModuleDependencyListener
> fs::make_absolute(AbsoluteSrc);
> // Canonicalize to a native path to avoid mixed separator styles.
> path::native(AbsoluteSrc);
> - // TODO: We probably need to handle .. as well as . in order to have
> valid
> - // input to the YAMLVFSWriter.
> - path::remove_dots(AbsoluteSrc);
> + // Remove redundant leading "./" pieces and consecutive separators.
> + AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
> +
> + // Canonicalize path by removing "..", "." components.
> + SmallString<256> CanonicalPath = AbsoluteSrc;
> + path::remove_dots(CanonicalPath, /*remove_dot_dot=*/true);
> +
> + // If a ".." component is present after a symlink component,
> remove_dots may
> + // lead to the wrong real destination path. Let the source be
> canonicalized
> + // like that but make sure the destination uses the real path.
> + bool HasDotDotInPath =
> + std::count(path::begin(AbsoluteSrc), path::end(AbsoluteSrc), "..")
> > 0;
> + SmallString<256> RealPath;
> + bool HasRemovedSymlinkComponent = HasDotDotInPath &&
> + getRealPath(AbsoluteSrc, RealPath) &&
> + !StringRef(CanonicalPath).equals(RealPath);
>
> // Build the destination path.
> SmallString<256> Dest = Collector.getDest();
> - path::append(Dest, path::relative_path(AbsoluteSrc));
> + path::append(Dest, path::relative_path(HasRemovedSymlinkComponent ?
> RealPath
> + :
> CanonicalPath));
>
> // Copy the file into place.
> if (std::error_code EC = fs::create_directories(path::parent_path(Dest),
>
> /*IgnoreExisting=*/true))
> return EC;
> - if (std::error_code EC = fs::copy_file(AbsoluteSrc, Dest))
> + if (std::error_code EC = fs::copy_file(
> + HasRemovedSymlinkComponent ? RealPath : CanonicalPath, Dest))
> return EC;
> - // Use the absolute path under the root for the file mapping.
> - Collector.addFileMapping(AbsoluteSrc, Dest);
> +
> + // Use the canonical path under the root for the file mapping. Also
> create
> + // an additional entry for the real path.
> + Collector.addFileMapping(CanonicalPath, Dest);
> + if (HasRemovedSymlinkComponent)
> + Collector.addFileMapping(RealPath, Dest);
> +
> return std::error_code();
> }
>
>
> Added: cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m?rev=263686&view=auto
>
> ==============================================================================
> --- cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m (added)
> +++ cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m Wed Mar 16
> 21:20:43 2016
> @@ -0,0 +1,72 @@
> +// REQUIRES: crash-recovery, shell
> +
> +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it?
> +// XFAIL: mingw32
> +
> +// Test that clang is capable of collecting the right header files in the
> +// crash reproducer if there's a symbolic link component in the path.
> +
> +// RUN: rm -rf %t
> +// RUN: mkdir -p %t/i %t/m %t %t/sysroot
> +// RUN: cp -a %S/Inputs/System/usr %t/i/
> +// RUN: ln -s include/tcl-private %t/i/usr/x
> +
> +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
> +// RUN: %clang -fsyntax-only %s -I %/t/i -isysroot %/t/sysroot/ \
> +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s
> +
> +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m
> +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh
> +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \
> +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml
> +// RUN: find %t/crash-vfs-*.cache/vfs | \
> +// RUN: grep "usr/include/stdio.h" | count 1
> +
> +#include "usr/x/../stdio.h"
> +
> +// CHECK: Preprocessed source(s) and associated run script(s) are located
> at:
> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m
> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache
> +
> +// CHECKSRC: @import cstd.stdio;
> +
> +// CHECKSH: # Crash reproducer
> +// CHECKSH-NEXT: # Driver args: "-fsyntax-only"
> +// CHECKSH-NEXT: # Original command: {{.*$}}
> +// CHECKSH-NEXT: "-cc1"
> +// CHECKSH: "-isysroot" "{{[^"]*}}/sysroot/"
> +// CHECKSH-NOT: "-fmodules-cache-path="
> +// CHECKSH: "crash-vfs-{{[^ ]*}}.m"
> +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml"
> +
> +// CHECKYAML: 'type': 'directory'
> +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr/include",
> +// CHECKYAML-NEXT: 'contents': [
> +// CHECKYAML-NEXT: {
> +// CHECKYAML-NEXT: 'type': 'file',
> +// CHECKYAML-NEXT: 'name': "module.map",
> +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^
> ]*}}/i/usr/include/module.map"
> +// CHECKYAML-NEXT: },
> +
> +// CHECKYAML: 'type': 'directory'
> +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr",
> +// CHECKYAML-NEXT: 'contents': [
> +// CHECKYAML-NEXT: {
> +// CHECKYAML-NEXT: 'type': 'file',
> +// CHECKYAML-NEXT: 'name': "module.map",
> +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^
> ]*}}/i/usr/include/module.map"
> +// CHECKYAML-NEXT: },
> +
> +// Test that by using the previous generated YAML file clang is able to
> find the
> +// right files inside the overlay and map the virtual request for a path
> that
> +// previously contained a symlink to work. To make sure of this, wipe out
> the
> +// %/t/i directory containing the symlink component.
> +
> +// RUN: rm -rf %/t/i
> +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH
> +// RUN: %clang -E %s -I %/t/i -isysroot %/t/sysroot/ \
> +// RUN: -ivfsoverlay %t/crash-vfs-*.cache/vfs/vfs.yaml -fmodules \
> +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \
> +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY
> +
> +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for
> "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/stdio.h"
>
> Added: cfe/trunk/test/Modules/crash-vfs-path-traversal.m
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-traversal.m?rev=263686&view=auto
>
> ==============================================================================
> --- cfe/trunk/test/Modules/crash-vfs-path-traversal.m (added)
> +++ cfe/trunk/test/Modules/crash-vfs-path-traversal.m Wed Mar 16 21:20:43
> 2016
> @@ -0,0 +1,60 @@
> +// REQUIRES: crash-recovery, shell, non-ms-sdk, non-ps4-sdk
> +
> +// FIXME: Canonicalizing paths to remove relative traversal components
> +// currenty fails a unittest on windows and is disable by default.
> +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it?
> +// XFAIL: mingw32
> +
> +// RUN: rm -rf %t
> +// RUN: mkdir -p %t/i %t/m %t
> +
> +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
> +// RUN: %clang -fsyntax-only %s -I %S/Inputs/System -isysroot %/t/i/ \
> +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s
> +
> +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m
> +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh
> +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \
> +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml
> +// RUN: find %t/crash-vfs-*.cache/vfs | \
> +// RUN: grep "Inputs/System/usr/include/stdio.h" | count 1
> +
> +#include "usr/././//////include/../include/./././../include/stdio.h"
> +
> +// CHECK: Preprocessed source(s) and associated run script(s) are located
> at:
> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m
> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache
> +
> +// CHECKSRC: @import cstd.stdio;
> +
> +// CHECKSH: # Crash reproducer
> +// CHECKSH-NEXT: # Driver args: "-fsyntax-only"
> +// CHECKSH-NEXT: # Original command: {{.*$}}
> +// CHECKSH-NEXT: "-cc1"
> +// CHECKSH: "-isysroot" "{{[^"]*}}/i/"
> +// CHECKSH-NOT: "-fmodules-cache-path="
> +// CHECKSH: "crash-vfs-{{[^ ]*}}.m"
> +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml"
> +
> +// CHECKYAML: 'type': 'directory'
> +// CHECKYAML: 'name': "{{[^ ]*}}/Inputs/System/usr/include",
> +// CHECKYAML-NEXT: 'contents': [
> +// CHECKYAML-NEXT: {
> +// CHECKYAML-NEXT: 'type': 'file',
> +// CHECKYAML-NEXT: 'name': "module.map",
> +// CHECKYAML-NEXT: 'external-contents': "{{[^
> ]*}}/Inputs/System/usr/include/module.map"
> +// CHECKYAML-NEXT: },
> +
> +// Replace the paths in the YAML files with relative ".." traversals
> +// and fed into clang to test whether we're correctly representing them
> +// in the VFS overlay.
> +
> +// RUN: sed -e "s at usr/include at usr/include/../include at g" \
> +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > %t/vfs.yaml
> +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH
> +// RUN: %clang -E %s -I %S/Inputs/System -isysroot %/t/i/ \
> +// RUN: -ivfsoverlay %t/vfs.yaml -fmodules \
> +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \
> +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY
> +
> +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for
> "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/usr/include/stdio.h"
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160406/1faf54bb/attachment-0001.html>
More information about the cfe-commits
mailing list