Sure, I'll tackle this soon! On Wed, Apr 6, 2016 at 6:59 PM, Sean Silva <chisophu...@gmail.com> wrote:
> > > On Wed, Mar 16, 2016 at 7:20 PM, Bruno Cardoso Lopes via cfe-commits < > cfe-commits@lists.llvm.org> wrote: > >> Author: bruno >> Date: Wed Mar 16 21:20:43 2016 >> New Revision: 263686 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=263686&view=rev >> Log: >> Reapply [2]: [VFS] Add support for handling path traversals >> >> This was applied twice r261551 and 263617 and later reverted because: >> >> (1) Windows bot failing on unittests. Change the current behavior to do >> not handle path traversals on windows. >> >> (2) Windows bot failed to include llvm/Config/config.h in order to use >> HAVE_REALPATH. Use LLVM_ON_UNIX instead, as done in >> lib/Basic/FileManager.cpp. >> >> Handle ".", ".." and "./" with trailing slashes while collecting files >> to be dumped into the vfs overlay directory. >> >> Include the support for symlinks into components. Given the path: >> >> /install-dir/bin/../lib/clang/3.8.0/include/altivec.h, if "bin" >> component is a symlink, it's not safe to use `path::remove_dots` here, >> and `realpath` is used to get the right answer. Since `realpath` >> is expensive, we only do it at collecting time (which only happens >> during the crash reproducer) and cache the base directory for fast >> lookups. >> >> Overall, this makes the input to the VFS YAML file to be canonicalized >> to never contain traversal components. >> >> Differential Revision: http://reviews.llvm.org/D17104 >> >> rdar://problem/24499339 >> >> Added: >> cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m >> cfe/trunk/test/Modules/crash-vfs-path-traversal.m >> Modified: >> cfe/trunk/lib/Basic/VirtualFileSystem.cpp >> cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp >> >> Modified: cfe/trunk/lib/Basic/VirtualFileSystem.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/VirtualFileSystem.cpp?rev=263686&r1=263685&r2=263686&view=diff >> >> ============================================================================== >> --- cfe/trunk/lib/Basic/VirtualFileSystem.cpp (original) >> +++ cfe/trunk/lib/Basic/VirtualFileSystem.cpp Wed Mar 16 21:20:43 2016 >> @@ -112,6 +112,20 @@ bool FileSystem::exists(const Twine &Pat >> return Status && Status->exists(); >> } >> >> +#ifndef NDEBUG >> +static bool isTraversalComponent(StringRef Component) { >> + return Component.equals("..") || Component.equals("."); >> +} >> + >> +static bool pathHasTraversal(StringRef Path) { >> + using namespace llvm::sys; >> + for (StringRef Comp : llvm::make_range(path::begin(Path), >> path::end(Path))) >> + if (isTraversalComponent(Comp)) >> + return true; >> + return false; >> +} >> +#endif >> + >> >> >> //===-----------------------------------------------------------------------===/ >> // RealFileSystem implementation >> >> >> //===-----------------------------------------------------------------------===/ >> @@ -819,6 +833,16 @@ class RedirectingFileSystem : public vfs >> bool UseExternalNames; >> /// @} >> >> + /// Virtual file paths and external files could be canonicalized >> without "..", >> + /// "." and "./" in their paths. FIXME: some unittests currently fail >> on >> + /// win32 when using remove_dots and remove_leading_dotslash on paths. >> + bool UseCanonicalizedPaths = >> +#ifdef LLVM_ON_WIN32 >> + false; >> +#else >> + true; >> +#endif >> + >> friend class RedirectingFileSystemParser; >> >> private: >> @@ -954,7 +978,7 @@ class RedirectingFileSystemParser { >> return true; >> } >> >> - std::unique_ptr<Entry> parseEntry(yaml::Node *N) { >> + std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem >> *FS) { >> yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N); >> if (!M) { >> error(N, "expected mapping node for file or directory entry"); >> @@ -994,7 +1018,17 @@ class RedirectingFileSystemParser { >> if (Key == "name") { >> if (!parseScalarString(I->getValue(), Value, Buffer)) >> return nullptr; >> - Name = Value; >> + >> + if (FS->UseCanonicalizedPaths) { >> + SmallString<256> Path(Value); >> + // Guarantee that old YAML files containing paths with ".." >> and "." >> + // are properly canonicalized before read into the VFS. >> + Path = sys::path::remove_leading_dotslash(Path); >> + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); >> + Name = Path.str(); >> + } else { >> + Name = Value; >> + } >> } else if (Key == "type") { >> if (!parseScalarString(I->getValue(), Value, Buffer)) >> return nullptr; >> @@ -1024,7 +1058,7 @@ class RedirectingFileSystemParser { >> for (yaml::SequenceNode::iterator I = Contents->begin(), >> E = Contents->end(); >> I != E; ++I) { >> - if (std::unique_ptr<Entry> E = parseEntry(&*I)) >> + if (std::unique_ptr<Entry> E = parseEntry(&*I, FS)) >> EntryArrayContents.push_back(std::move(E)); >> else >> return nullptr; >> @@ -1038,7 +1072,16 @@ class RedirectingFileSystemParser { >> HasContents = true; >> if (!parseScalarString(I->getValue(), Value, Buffer)) >> return nullptr; >> - ExternalContentsPath = Value; >> + if (FS->UseCanonicalizedPaths) { >> + SmallString<256> Path(Value); >> + // Guarantee that old YAML files containing paths with ".." >> and "." >> + // are properly canonicalized before read into the VFS. >> + Path = sys::path::remove_leading_dotslash(Path); >> + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); >> + ExternalContentsPath = Path.str(); >> + } else { >> + ExternalContentsPath = Value; >> + } >> } else if (Key == "use-external-name") { >> bool Val; >> if (!parseScalarBool(I->getValue(), Val)) >> @@ -1149,7 +1192,7 @@ public: >> >> for (yaml::SequenceNode::iterator I = Roots->begin(), E = >> Roots->end(); >> I != E; ++I) { >> - if (std::unique_ptr<Entry> E = parseEntry(&*I)) >> + if (std::unique_ptr<Entry> E = parseEntry(&*I, FS)) >> FS->Roots.push_back(std::move(E)); >> else >> return false; >> @@ -1228,6 +1271,14 @@ ErrorOr<Entry *> RedirectingFileSystem:: >> if (std::error_code EC = makeAbsolute(Path)) >> return EC; >> >> + // Canonicalize path by removing ".", "..", "./", etc components. This >> is >> + // a VFS request, do bot bother about symlinks in the path components >> + // but canonicalize in order to perform the correct entry search. >> + if (UseCanonicalizedPaths) { >> + Path = sys::path::remove_leading_dotslash(Path); >> + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); >> + } >> + >> if (Path.empty()) >> return make_error_code(llvm::errc::invalid_argument); >> >> @@ -1244,10 +1295,17 @@ ErrorOr<Entry *> RedirectingFileSystem:: >> ErrorOr<Entry *> >> RedirectingFileSystem::lookupPath(sys::path::const_iterator Start, >> sys::path::const_iterator End, Entry >> *From) { >> +#ifndef LLVM_ON_WIN32 >> + assert(!isTraversalComponent(*Start) && >> + !isTraversalComponent(From->getName()) && >> + "Paths should not contain traversal components"); >> +#else >> + // FIXME: this is here to support windows, remove it once canonicalized >> + // paths become globally default. >> if (Start->equals(".")) >> ++Start; >> +#endif >> >> - // FIXME: handle .. >> if (CaseSensitive ? !Start->equals(From->getName()) >> : !Start->equals_lower(From->getName())) >> // failure to match >> @@ -1366,16 +1424,6 @@ UniqueID vfs::getNextVirtualUniqueID() { >> return UniqueID(std::numeric_limits<uint64_t>::max(), ID); >> } >> >> -#ifndef NDEBUG >> -static bool pathHasTraversal(StringRef Path) { >> - using namespace llvm::sys; >> - for (StringRef Comp : llvm::make_range(path::begin(Path), >> path::end(Path))) >> - if (Comp == "." || Comp == "..") >> - return true; >> - return false; >> -} >> -#endif >> - >> void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef >> RealPath) { >> assert(sys::path::is_absolute(VirtualPath) && "virtual path not >> absolute"); >> assert(sys::path::is_absolute(RealPath) && "real path not absolute"); >> >> Modified: cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp?rev=263686&r1=263685&r2=263686&view=diff >> >> ============================================================================== >> --- cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp (original) >> +++ cfe/trunk/lib/Frontend/ModuleDependencyCollector.cpp Wed Mar 16 >> 21:20:43 2016 >> @@ -13,8 +13,9 @@ >> >> #include "clang/Frontend/Utils.h" >> #include "clang/Serialization/ASTReader.h" >> -#include "llvm/ADT/StringSet.h" >> +#include "llvm/ADT/StringMap.h" >> #include "llvm/ADT/iterator_range.h" >> +#include "llvm/Config/config.h" >> #include "llvm/Support/FileSystem.h" >> #include "llvm/Support/Path.h" >> #include "llvm/Support/raw_ostream.h" >> @@ -25,7 +26,9 @@ namespace { >> /// Private implementation for ModuleDependencyCollector >> class ModuleDependencyListener : public ASTReaderListener { >> ModuleDependencyCollector &Collector; >> + llvm::StringMap<std::string> SymLinkMap; >> >> + bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result); >> std::error_code copyToRoot(StringRef Src); >> public: >> ModuleDependencyListener(ModuleDependencyCollector &Collector) >> @@ -57,6 +60,48 @@ void ModuleDependencyCollector::writeFil >> VFSWriter.write(OS); >> } >> >> +// TODO: move this to Support/Path.h and check for HAVE_REALPATH? >> +static bool real_path(StringRef SrcPath, SmallVectorImpl<char> >> &RealPath) { >> +#ifdef LLVM_ON_UNIX >> + char CanonicalPath[PATH_MAX]; >> + >> + // TODO: emit a warning in case this fails...? >> + if (!realpath(SrcPath.str().c_str(), CanonicalPath)) >> + return false; >> + >> + SmallString<256> RPath(CanonicalPath); >> + RealPath.swap(RPath); >> + return true; >> +#else >> + // FIXME: Add support for systems without realpath. >> + return false; >> +#endif >> +} >> > > We already have a similar call to realpath in lib/Basic/FileManager.cpp > (ugly #ifdef and all) > > Could you avoid duplicating it? > > -- Sean Silva > > >> + >> +bool ModuleDependencyListener::getRealPath(StringRef SrcPath, >> + SmallVectorImpl<char> >> &Result) { >> + using namespace llvm::sys; >> + SmallString<256> RealPath; >> + StringRef FileName = path::filename(SrcPath); >> + std::string Dir = path::parent_path(SrcPath).str(); >> + auto DirWithSymLink = SymLinkMap.find(Dir); >> + >> + // Use real_path to fix any symbolic link component present in a path. >> + // Computing the real path is expensive, cache the search through the >> + // parent path directory. >> + if (DirWithSymLink == SymLinkMap.end()) { >> + if (!real_path(Dir, RealPath)) >> + return false; >> + SymLinkMap[Dir] = RealPath.str(); >> + } else { >> + RealPath = DirWithSymLink->second; >> + } >> + >> + path::append(RealPath, FileName); >> + Result.swap(RealPath); >> + return true; >> +} >> + >> std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) { >> using namespace llvm::sys; >> >> @@ -65,22 +110,42 @@ std::error_code ModuleDependencyListener >> fs::make_absolute(AbsoluteSrc); >> // Canonicalize to a native path to avoid mixed separator styles. >> path::native(AbsoluteSrc); >> - // TODO: We probably need to handle .. as well as . in order to have >> valid >> - // input to the YAMLVFSWriter. >> - path::remove_dots(AbsoluteSrc); >> + // Remove redundant leading "./" pieces and consecutive separators. >> + AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc); >> + >> + // Canonicalize path by removing "..", "." components. >> + SmallString<256> CanonicalPath = AbsoluteSrc; >> + path::remove_dots(CanonicalPath, /*remove_dot_dot=*/true); >> + >> + // If a ".." component is present after a symlink component, >> remove_dots may >> + // lead to the wrong real destination path. Let the source be >> canonicalized >> + // like that but make sure the destination uses the real path. >> + bool HasDotDotInPath = >> + std::count(path::begin(AbsoluteSrc), path::end(AbsoluteSrc), "..") >> > 0; >> + SmallString<256> RealPath; >> + bool HasRemovedSymlinkComponent = HasDotDotInPath && >> + getRealPath(AbsoluteSrc, RealPath) && >> + !StringRef(CanonicalPath).equals(RealPath); >> >> // Build the destination path. >> SmallString<256> Dest = Collector.getDest(); >> - path::append(Dest, path::relative_path(AbsoluteSrc)); >> + path::append(Dest, path::relative_path(HasRemovedSymlinkComponent ? >> RealPath >> + : >> CanonicalPath)); >> >> // Copy the file into place. >> if (std::error_code EC = >> fs::create_directories(path::parent_path(Dest), >> >> /*IgnoreExisting=*/true)) >> return EC; >> - if (std::error_code EC = fs::copy_file(AbsoluteSrc, Dest)) >> + if (std::error_code EC = fs::copy_file( >> + HasRemovedSymlinkComponent ? RealPath : CanonicalPath, Dest)) >> return EC; >> - // Use the absolute path under the root for the file mapping. >> - Collector.addFileMapping(AbsoluteSrc, Dest); >> + >> + // Use the canonical path under the root for the file mapping. Also >> create >> + // an additional entry for the real path. >> + Collector.addFileMapping(CanonicalPath, Dest); >> + if (HasRemovedSymlinkComponent) >> + Collector.addFileMapping(RealPath, Dest); >> + >> return std::error_code(); >> } >> >> >> Added: cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m?rev=263686&view=auto >> >> ============================================================================== >> --- cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m (added) >> +++ cfe/trunk/test/Modules/crash-vfs-path-symlink-component.m Wed Mar 16 >> 21:20:43 2016 >> @@ -0,0 +1,72 @@ >> +// REQUIRES: crash-recovery, shell >> + >> +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? >> +// XFAIL: mingw32 >> + >> +// Test that clang is capable of collecting the right header files in the >> +// crash reproducer if there's a symbolic link component in the path. >> + >> +// RUN: rm -rf %t >> +// RUN: mkdir -p %t/i %t/m %t %t/sysroot >> +// RUN: cp -a %S/Inputs/System/usr %t/i/ >> +// RUN: ln -s include/tcl-private %t/i/usr/x >> + >> +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ >> +// RUN: %clang -fsyntax-only %s -I %/t/i -isysroot %/t/sysroot/ \ >> +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s >> + >> +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m >> +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh >> +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \ >> +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml >> +// RUN: find %t/crash-vfs-*.cache/vfs | \ >> +// RUN: grep "usr/include/stdio.h" | count 1 >> + >> +#include "usr/x/../stdio.h" >> + >> +// CHECK: Preprocessed source(s) and associated run script(s) are >> located at: >> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m >> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache >> + >> +// CHECKSRC: @import cstd.stdio; >> + >> +// CHECKSH: # Crash reproducer >> +// CHECKSH-NEXT: # Driver args: "-fsyntax-only" >> +// CHECKSH-NEXT: # Original command: {{.*$}} >> +// CHECKSH-NEXT: "-cc1" >> +// CHECKSH: "-isysroot" "{{[^"]*}}/sysroot/" >> +// CHECKSH-NOT: "-fmodules-cache-path=" >> +// CHECKSH: "crash-vfs-{{[^ ]*}}.m" >> +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml" >> + >> +// CHECKYAML: 'type': 'directory' >> +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr/include", >> +// CHECKYAML-NEXT: 'contents': [ >> +// CHECKYAML-NEXT: { >> +// CHECKYAML-NEXT: 'type': 'file', >> +// CHECKYAML-NEXT: 'name': "module.map", >> +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ >> ]*}}/i/usr/include/module.map" >> +// CHECKYAML-NEXT: }, >> + >> +// CHECKYAML: 'type': 'directory' >> +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr", >> +// CHECKYAML-NEXT: 'contents': [ >> +// CHECKYAML-NEXT: { >> +// CHECKYAML-NEXT: 'type': 'file', >> +// CHECKYAML-NEXT: 'name': "module.map", >> +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ >> ]*}}/i/usr/include/module.map" >> +// CHECKYAML-NEXT: }, >> + >> +// Test that by using the previous generated YAML file clang is able to >> find the >> +// right files inside the overlay and map the virtual request for a path >> that >> +// previously contained a symlink to work. To make sure of this, wipe >> out the >> +// %/t/i directory containing the symlink component. >> + >> +// RUN: rm -rf %/t/i >> +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH >> +// RUN: %clang -E %s -I %/t/i -isysroot %/t/sysroot/ \ >> +// RUN: -ivfsoverlay %t/crash-vfs-*.cache/vfs/vfs.yaml -fmodules \ >> +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \ >> +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY >> + >> +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for >> "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/stdio.h" >> >> Added: cfe/trunk/test/Modules/crash-vfs-path-traversal.m >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/crash-vfs-path-traversal.m?rev=263686&view=auto >> >> ============================================================================== >> --- cfe/trunk/test/Modules/crash-vfs-path-traversal.m (added) >> +++ cfe/trunk/test/Modules/crash-vfs-path-traversal.m Wed Mar 16 21:20:43 >> 2016 >> @@ -0,0 +1,60 @@ >> +// REQUIRES: crash-recovery, shell, non-ms-sdk, non-ps4-sdk >> + >> +// FIXME: Canonicalizing paths to remove relative traversal components >> +// currenty fails a unittest on windows and is disable by default. >> +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? >> +// XFAIL: mingw32 >> + >> +// RUN: rm -rf %t >> +// RUN: mkdir -p %t/i %t/m %t >> + >> +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ >> +// RUN: %clang -fsyntax-only %s -I %S/Inputs/System -isysroot %/t/i/ \ >> +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s >> + >> +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m >> +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh >> +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \ >> +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml >> +// RUN: find %t/crash-vfs-*.cache/vfs | \ >> +// RUN: grep "Inputs/System/usr/include/stdio.h" | count 1 >> + >> +#include "usr/././//////include/../include/./././../include/stdio.h" >> + >> +// CHECK: Preprocessed source(s) and associated run script(s) are >> located at: >> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m >> +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache >> + >> +// CHECKSRC: @import cstd.stdio; >> + >> +// CHECKSH: # Crash reproducer >> +// CHECKSH-NEXT: # Driver args: "-fsyntax-only" >> +// CHECKSH-NEXT: # Original command: {{.*$}} >> +// CHECKSH-NEXT: "-cc1" >> +// CHECKSH: "-isysroot" "{{[^"]*}}/i/" >> +// CHECKSH-NOT: "-fmodules-cache-path=" >> +// CHECKSH: "crash-vfs-{{[^ ]*}}.m" >> +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml" >> + >> +// CHECKYAML: 'type': 'directory' >> +// CHECKYAML: 'name': "{{[^ ]*}}/Inputs/System/usr/include", >> +// CHECKYAML-NEXT: 'contents': [ >> +// CHECKYAML-NEXT: { >> +// CHECKYAML-NEXT: 'type': 'file', >> +// CHECKYAML-NEXT: 'name': "module.map", >> +// CHECKYAML-NEXT: 'external-contents': "{{[^ >> ]*}}/Inputs/System/usr/include/module.map" >> +// CHECKYAML-NEXT: }, >> + >> +// Replace the paths in the YAML files with relative ".." traversals >> +// and fed into clang to test whether we're correctly representing them >> +// in the VFS overlay. >> + >> +// RUN: sed -e "s@usr/include@usr/include/../include@g" \ >> +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > %t/vfs.yaml >> +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH >> +// RUN: %clang -E %s -I %S/Inputs/System -isysroot %/t/i/ \ >> +// RUN: -ivfsoverlay %t/vfs.yaml -fmodules \ >> +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \ >> +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY >> + >> +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for >> "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/usr/include/stdio.h" >> >> >> _______________________________________________ >> cfe-commits mailing list >> cfe-commits@lists.llvm.org >> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >> > > -- Bruno Cardoso Lopes http://www.brunocardoso.cc
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits