Author: Saleem Abdulrasool Date: 2023-08-01T11:00:27-07:00 New Revision: 05d613ea931b6de1b46dfe04b8e55285359047f4
URL: https://github.com/llvm/llvm-project/commit/05d613ea931b6de1b46dfe04b8e55285359047f4 DIFF: https://github.com/llvm/llvm-project/commit/05d613ea931b6de1b46dfe04b8e55285359047f4.diff LOG: [lit][clang] Avoid realpath on Windows due to MAX_PATH limitations Running lit tests on Windows can fail because its use of `os.path.realpath` expands substitute drives, which are used to keep paths short and avoid hitting MAX_PATH limitations. Changes lit logic to: Use `os.path.abspath` on Windows, where `MAX_PATH` is a concern that we can work around using substitute drives, which `os.path.realpath` would resolve. Use `os.path.realpath` on Unix, where the current directory always has symlinks resolved, so it is impossible to preserve symlinks in the presence of relative paths, and so we must make sure that all code paths use real paths. Also updates clang's `FileManager::getCanonicalName` and `ExtractAPI` code to avoid resolving substitute drives (i.e. resolving to a path under a different root). How tested: built with `-DLLVM_ENABLE_PROJECTS=clang` and built `check-all` on both Windows Differential Revision: https://reviews.llvm.org/D154130 Reviewed By: @benlangmuir Patch by Tristan Labelle <tristan@thebrowser.company>! Added: Modified: clang/include/clang/Basic/FileManager.h clang/lib/Basic/FileManager.cpp clang/lib/ExtractAPI/ExtractAPIConsumer.cpp clang/test/Lexer/case-insensitive-include-win.c llvm/cmake/modules/AddLLVM.cmake llvm/docs/CommandGuide/lit.rst llvm/docs/TestingGuide.rst llvm/utils/lit/lit/TestRunner.py llvm/utils/lit/lit/builtin_commands/diff.py llvm/utils/lit/lit/discovery.py llvm/utils/lit/lit/util.py llvm/utils/lit/tests/Inputs/config-map-discovery/driver.py llvm/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg llvm/utils/lit/tests/Inputs/use-llvm-tool-required/lit.cfg llvm/utils/lit/tests/Inputs/use-llvm-tool/lit.cfg llvm/utils/llvm-lit/llvm-lit.in Removed: ################################################################################ diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 502b69c3b41baa..58ff42ee5c2416 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -329,6 +329,13 @@ class FileManager : public RefCountedBase<FileManager> { /// required, which is (almost) never. StringRef getCanonicalName(const FileEntry *File); +private: + /// Retrieve the canonical name for a given file or directory. + /// + /// The first param is a key in the CanonicalNames array. + StringRef getCanonicalName(const void *Entry, StringRef Name); + +public: void PrintStats() const; }; diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index f92c1aeb211241..c3eec80caaf301 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -632,33 +632,48 @@ void FileManager::GetUniqueIDMapping( } StringRef FileManager::getCanonicalName(DirectoryEntryRef Dir) { - auto Known = CanonicalNames.find(Dir); - if (Known != CanonicalNames.end()) - return Known->second; - - StringRef CanonicalName(Dir.getName()); - - SmallString<4096> CanonicalNameBuf; - if (!FS->getRealPath(Dir.getName(), CanonicalNameBuf)) - CanonicalName = CanonicalNameBuf.str().copy(CanonicalNameStorage); - - CanonicalNames.insert({Dir, CanonicalName}); - return CanonicalName; + return getCanonicalName(Dir, Dir.getName()); } StringRef FileManager::getCanonicalName(const FileEntry *File) { - llvm::DenseMap<const void *, llvm::StringRef>::iterator Known - = CanonicalNames.find(File); + return getCanonicalName(File, File->getName()); +} + +StringRef FileManager::getCanonicalName(const void *Entry, StringRef Name) { + llvm::DenseMap<const void *, llvm::StringRef>::iterator Known = + CanonicalNames.find(Entry); if (Known != CanonicalNames.end()) return Known->second; - StringRef CanonicalName(File->getName()); - - SmallString<4096> CanonicalNameBuf; - if (!FS->getRealPath(File->getName(), CanonicalNameBuf)) - CanonicalName = CanonicalNameBuf.str().copy(CanonicalNameStorage); + // Name comes from FileEntry/DirectoryEntry::getName(), so it is safe to + // store it in the DenseMap below. + StringRef CanonicalName(Name); + + SmallString<256> AbsPathBuf; + SmallString<256> RealPathBuf; + if (!FS->getRealPath(Name, RealPathBuf)) { + if (is_style_windows(llvm::sys::path::Style::native)) { + // For Windows paths, only use the real path if it doesn't resolve + // a substitute drive, as those are used to avoid MAX_PATH issues. + AbsPathBuf = Name; + if (!FS->makeAbsolute(AbsPathBuf)) { + if (llvm::sys::path::root_name(RealPathBuf) == + llvm::sys::path::root_name(AbsPathBuf)) { + CanonicalName = RealPathBuf.str().copy(CanonicalNameStorage); + } else { + // Fallback to using the absolute path. + // Simplifying /../ is semantically valid on Windows even in the + // presence of symbolic links. + llvm::sys::path::remove_dots(AbsPathBuf, /*remove_dot_dot=*/true); + CanonicalName = AbsPathBuf.str().copy(CanonicalNameStorage); + } + } + } else { + CanonicalName = RealPathBuf.str().copy(CanonicalNameStorage); + } + } - CanonicalNames.insert({File, CanonicalName}); + CanonicalNames.insert({Entry, CanonicalName}); return CanonicalName; } diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index eb533a934367f4..50c223680b392d 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -187,9 +187,7 @@ struct LocationFileChecker { if (ExternalFileEntries.count(File)) return false; - StringRef FileName = File->tryGetRealPathName().empty() - ? File->getName() - : File->tryGetRealPathName(); + StringRef FileName = SM.getFileManager().getCanonicalName(File); // Try to reduce the include name the same way we tried to include it. bool IsQuoted = false; diff --git a/clang/test/Lexer/case-insensitive-include-win.c b/clang/test/Lexer/case-insensitive-include-win.c index 6a17d0b552448a..ed722feb3d9942 100644 --- a/clang/test/Lexer/case-insensitive-include-win.c +++ b/clang/test/Lexer/case-insensitive-include-win.c @@ -2,9 +2,15 @@ // This file should only include code that really needs a Windows host OS to // run. +// Note: We must use the real path here, because the logic to detect case +// mismatches must resolve the real path to figure out the original casing. +// If we use %t and we are on a substitute drive S: mapping to C:\subst, +// then we will compare "S:\test.dir\FOO.h" to "C:\subst\test.dir\foo.h" +// and avoid emitting the diagnostic because the structure is diff erent. + // REQUIRES: system-windows // RUN: mkdir -p %t.dir // RUN: touch %t.dir/foo.h -// RUN: not %clang_cl /FI\\?\%t.dir\FOO.h /WX -fsyntax-only %s 2>&1 | FileCheck %s +// RUN: not %clang_cl /FI \\?\%{t:real}.dir\FOO.h /WX -fsyntax-only %s 2>&1 | FileCheck %s // CHECK: non-portable path to file '"\\?\ diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 230620c37027a1..a9aed212e5678f 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1711,10 +1711,15 @@ endfunction() # use it and can't be in a lit module. Use with make_paths_relative(). string(CONCAT LLVM_LIT_PATH_FUNCTION "# Allow generated file to be relocatable.\n" - "from pathlib import Path\n" + "import os\n" + "import platform\n" "def path(p):\n" " if not p: return ''\n" - " return str((Path(__file__).parent / p).resolve())\n" + " # Follows lit.util.abs_path_preserve_drive, which cannot be imported here.\n" + " if platform.system() == 'Windows':\n" + " return os.path.abspath(os.path.join(os.path.dirname(__file__), p))\n" + " else:\n" + " return os.path.realpath(os.path.join(os.path.dirname(__file__), p))\n" ) # This function provides an automatic way to 'configure'-like generate a file diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index e82cfeb0695c1f..28319660d69c7c 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -541,6 +541,16 @@ TestRunner.py: %/p %p but ``\`` is replaced by ``/`` %/t %t but ``\`` is replaced by ``/`` %/T %T but ``\`` is replaced by ``/`` + %{s:real} %s after expanding all symbolic links and substitute drives + %{S:real} %S after expanding all symbolic links and substitute drives + %{p:real} %p after expanding all symbolic links and substitute drives + %{t:real} %t after expanding all symbolic links and substitute drives + %{T:real} %T after expanding all symbolic links and substitute drives + %{/s:real} %/s after expanding all symbolic links and substitute drives + %{/S:real} %/S after expanding all symbolic links and substitute drives + %{/p:real} %/p after expanding all symbolic links and substitute drives + %{/t:real} %/t after expanding all symbolic links and substitute drives + %{/T:real} %/T after expanding all symbolic links and substitute drives %{/s:regex_replacement} %/s but escaped for use in the replacement of a ``s@@@`` command in sed %{/S:regex_replacement} %/S but escaped for use in the replacement of a ``s@@@`` command in sed %{/p:regex_replacement} %/p but escaped for use in the replacement of a ``s@@@`` command in sed diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 56e923affafe44..a692e301fa2c41 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -671,7 +671,7 @@ RUN lines: ``${fs-sep}`` Expands to the file system separator, i.e. ``/`` or ``\`` on Windows. -``%/s, %/S, %/t, %/T:`` +``%/s, %/S, %/t, %/T`` Act like the corresponding substitution above but replace any ``\`` character with a ``/``. This is useful to normalize path separators. @@ -680,7 +680,17 @@ RUN lines: Example: ``%/s: C:/Desktop Files/foo_test.s.tmp`` -``%:s, %:S, %:t, %:T:`` +``%{s:real}, %{S:real}, %{t:real}, %{T:real}`` +``%{/s:real}, %{/S:real}, %{/t:real}, %{/T:real}`` + + Act like the corresponding substitution, including with ``/``, but use + the real path by expanding all symbolic links and substitute drives. + + Example: ``%s: S:\foo_test.s.tmp`` + + Example: ``%{/s:real}: C:/SDrive/foo_test.s.tmp`` + +``%:s, %:S, %:t, %:T`` Act like the corresponding substitution above but remove colons at the beginning of Windows paths. This is useful to allow concatenation diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 5c639b76155eb1..ed06aaa54d60e2 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -74,7 +74,7 @@ def change_dir(self, newdir): if os.path.isabs(newdir): self.cwd = newdir else: - self.cwd = os.path.realpath(os.path.join(self.cwd, newdir)) + self.cwd = lit.util.abs_path_preserve_drive(os.path.join(self.cwd, newdir)) class TimeoutHelper(object): @@ -427,7 +427,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv): dir = to_unicode(dir) if kIsWindows else to_bytes(dir) cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) if not os.path.isabs(dir): - dir = os.path.realpath(os.path.join(cwd, dir)) + dir = lit.util.abs_path_preserve_drive(os.path.join(cwd, dir)) if parent: lit.util.mkdir_p(dir) else: @@ -473,7 +473,7 @@ def on_rm_error(func, path, exc_info): path = to_unicode(path) if kIsWindows else to_bytes(path) cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) if not os.path.isabs(path): - path = os.path.realpath(os.path.join(cwd, path)) + path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path)) if force and not os.path.exists(path): continue try: @@ -1243,18 +1243,10 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False): substitutions.extend(test.config.substitutions) tmpName = tmpBase + ".tmp" baseName = os.path.basename(tmpBase) - substitutions.extend( - [ - ("%s", sourcepath), - ("%S", sourcedir), - ("%p", sourcedir), - ("%{pathsep}", os.pathsep), - ("%t", tmpName), - ("%basename_t", baseName), - ("%T", tmpDir), - ] - ) + substitutions.append(("%{pathsep}", os.pathsep)) + substitutions.append(("%basename_t", baseName)) + substitutions.extend( [ ("%{fs-src-root}", pathlib.Path(sourcedir).anchor), @@ -1263,49 +1255,47 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False): ] ) - # "%/[STpst]" should be normalized. - substitutions.extend( - [ - ("%/s", sourcepath.replace("\\", "/")), - ("%/S", sourcedir.replace("\\", "/")), - ("%/p", sourcedir.replace("\\", "/")), - ("%/t", tmpBase.replace("\\", "/") + ".tmp"), - ("%/T", tmpDir.replace("\\", "/")), - ("%/et", tmpName.replace("\\", "\\\\\\\\\\\\\\\\")), - ] - ) + substitutions.append(("%/et", tmpName.replace("\\", "\\\\\\\\\\\\\\\\"))) - # "%{/[STpst]:regex_replacement}" should be normalized like "%/[STpst]" but we're - # also in a regex replacement context of a s@@@ regex. def regex_escape(s): s = s.replace("@", r"\@") s = s.replace("&", r"\&") return s - substitutions.extend( - [ - ("%{/s:regex_replacement}", regex_escape(sourcepath.replace("\\", "/"))), - ("%{/S:regex_replacement}", regex_escape(sourcedir.replace("\\", "/"))), - ("%{/p:regex_replacement}", regex_escape(sourcedir.replace("\\", "/"))), - ( - "%{/t:regex_replacement}", - regex_escape(tmpBase.replace("\\", "/")) + ".tmp", - ), - ("%{/T:regex_replacement}", regex_escape(tmpDir.replace("\\", "/"))), - ] - ) + path_substitutions = [ + ("s", sourcepath), ("S", sourcedir), ("p", sourcedir), + ("t", tmpName), ("T", tmpDir) + ] + for path_substitution in path_substitutions: + letter = path_substitution[0] + path = path_substitution[1] + + # Original path variant + substitutions.append(("%" + letter, path)) + + # Normalized path separator variant + substitutions.append(("%/" + letter, path.replace("\\", "/"))) + + # realpath variants + # Windows paths with substitute drives are not expanded by default + # as they are used to avoid MAX_PATH issues, but sometimes we do + # need the fully expanded path. + real_path = os.path.realpath(path) + substitutions.append(("%{" + letter + ":real}", real_path)) + substitutions.append(("%{/" + letter + ":real}", + real_path.replace("\\", "/"))) + + # "%{/[STpst]:regex_replacement}" should be normalized like + # "%/[STpst]" but we're also in a regex replacement context + # of a s@@@ regex. + substitutions.append( + ("%{/" + letter + ":regex_replacement}", + regex_escape(path.replace("\\", "/")))) + + # "%:[STpst]" are normalized paths without colons and without + # a leading slash. + substitutions.append(("%:" + letter, colonNormalizePath(path))) - # "%:[STpst]" are normalized paths without colons and without a leading - # slash. - substitutions.extend( - [ - ("%:s", colonNormalizePath(sourcepath)), - ("%:S", colonNormalizePath(sourcedir)), - ("%:p", colonNormalizePath(sourcedir)), - ("%:t", colonNormalizePath(tmpBase + ".tmp")), - ("%:T", colonNormalizePath(tmpDir)), - ] - ) return substitutions diff --git a/llvm/utils/lit/lit/builtin_commands/ diff .py b/llvm/utils/lit/lit/builtin_commands/ diff .py index 3a91920f9b5ed8..fbf4eb0e137b34 100644 --- a/llvm/utils/lit/lit/builtin_commands/ diff .py +++ b/llvm/utils/lit/lit/builtin_commands/ diff .py @@ -281,7 +281,7 @@ def main(argv): try: for file in args: if file != "-" and not os.path.isabs(file): - file = os.path.realpath(os.path.join(os.getcwd(), file)) + file = util.abs_path_preserve_drive(file) if flags.recursive_ diff : if file == "-": diff --git a/llvm/utils/lit/lit/discovery.py b/llvm/utils/lit/lit/discovery.py index a0a2549d8df997..5f96dc9c823c61 100644 --- a/llvm/utils/lit/lit/discovery.py +++ b/llvm/utils/lit/lit/discovery.py @@ -7,7 +7,7 @@ import sys from lit.TestingConfig import TestingConfig -from lit import LitConfig, Test +from lit import LitConfig, Test, util def chooseConfigFileFromDir(dir, config_names): @@ -56,8 +56,8 @@ def search1(path): # configuration to load instead. config_map = litConfig.params.get("config_map") if config_map: - cfgpath = os.path.realpath(cfgpath) - target = config_map.get(os.path.normcase(cfgpath)) + cfgpath = util.abs_path_preserve_drive(cfgpath) + target = config_map.get(cfgpath) if target: cfgpath = target @@ -67,13 +67,13 @@ def search1(path): cfg = TestingConfig.fromdefaults(litConfig) cfg.load_from_path(cfgpath, litConfig) - source_root = os.path.realpath(cfg.test_source_root or path) - exec_root = os.path.realpath(cfg.test_exec_root or path) + source_root = util.abs_path_preserve_drive(cfg.test_source_root or path) + exec_root = util.abs_path_preserve_drive(cfg.test_exec_root or path) return Test.TestSuite(cfg.name, source_root, exec_root, cfg), () def search(path): # Check for an already instantiated test suite. - real_path = os.path.realpath(path) + real_path = util.abs_path_preserve_drive(path) res = cache.get(real_path) if res is None: cache[real_path] = res = search1(path) diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py index 882deb3d03efc5..f2df5c1e032966 100644 --- a/llvm/utils/lit/lit/util.py +++ b/llvm/utils/lit/lit/util.py @@ -127,6 +127,23 @@ def usable_core_count(): return n +def abs_path_preserve_drive(path): + """Return the absolute path without resolving drive mappings on Windows. + + """ + if platform.system() == "Windows": + # Windows has limitations on path length (MAX_PATH) that + # can be worked around using substitute drives, which map + # a drive letter to a longer path on another drive. + # Since Python 3.8, os.path.realpath resolves sustitute drives, + # so we should not use it. In Python 3.7, os.path.realpath + # was implemented as os.path.abspath. + return os.path.normpath(os.path.abspath(path)) + else: + # On UNIX, the current directory always has symbolic links resolved, + # so any program accepting relative paths cannot preserve symbolic + # links in paths and we should always use os.path.realpath. + return os.path.normpath(os.path.realpath(path)) def mkdir(path): try: diff --git a/llvm/utils/lit/tests/Inputs/config-map-discovery/driver.py b/llvm/utils/lit/tests/Inputs/config-map-discovery/driver.py index d22b84d6a15cff..dbd151094d36f6 100644 --- a/llvm/utils/lit/tests/Inputs/config-map-discovery/driver.py +++ b/llvm/utils/lit/tests/Inputs/config-map-discovery/driver.py @@ -2,9 +2,7 @@ import os import sys -main_config = sys.argv[1] -main_config = os.path.realpath(main_config) -main_config = os.path.normcase(main_config) +main_config = lit.util.abs_path_preserve_drive(sys.argv[1]) config_map = {main_config: sys.argv[2]} builtin_parameters = {"config_map": config_map} diff --git a/llvm/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg b/llvm/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg index c7b303f50a05c8..27860e192fdc18 100644 --- a/llvm/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg +++ b/llvm/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg @@ -5,5 +5,5 @@ config.suffixes = ['.txt'] config.test_format = lit.formats.ShTest() import os -config.test_exec_root = os.path.realpath(os.path.dirname(__file__)) +config.test_exec_root = os.path.dirname(lit.util.abs_path_preserve_drive(__file__)) config.test_source_root = os.path.join(config.test_exec_root, "tests") diff --git a/llvm/utils/lit/tests/Inputs/use-llvm-tool-required/lit.cfg b/llvm/utils/lit/tests/Inputs/use-llvm-tool-required/lit.cfg index e41207bc2f05da..5062e38c82f14d 100644 --- a/llvm/utils/lit/tests/Inputs/use-llvm-tool-required/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/use-llvm-tool-required/lit.cfg @@ -1,4 +1,5 @@ import lit.formats +import lit.util config.name = "use-llvm-tool-required" config.suffixes = [".txt"] @@ -7,7 +8,7 @@ config.test_source_root = None config.test_exec_root = None import os.path -config.llvm_tools_dir = os.path.realpath(os.path.dirname(__file__)) +config.llvm_tools_dir = os.path.dirname(lit.util.abs_path_preserve_drive(__file__)) import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/utils/lit/tests/Inputs/use-llvm-tool/lit.cfg b/llvm/utils/lit/tests/Inputs/use-llvm-tool/lit.cfg index 8fe62d98c1349e..2a52db2183edf5 100644 --- a/llvm/utils/lit/tests/Inputs/use-llvm-tool/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/use-llvm-tool/lit.cfg @@ -1,4 +1,5 @@ import lit.formats +import lit.util config.name = "use-llvm-tool" config.suffixes = [".txt"] @@ -7,7 +8,7 @@ config.test_source_root = None config.test_exec_root = None import os.path -this_dir = os.path.realpath(os.path.dirname(__file__)) +this_dir = os.path.dirname(lit.util.abs_path_preserve_drive(__file__)) config.llvm_tools_dir = os.path.join(this_dir, "build") import lit.llvm diff --git a/llvm/utils/llvm-lit/llvm-lit.in b/llvm/utils/llvm-lit/llvm-lit.in index 33ec8017cf05f2..0b6683b6b62300 100755 --- a/llvm/utils/llvm-lit/llvm-lit.in +++ b/llvm/utils/llvm-lit/llvm-lit.in @@ -8,7 +8,7 @@ config_map = {} def map_config(source_dir, site_config): global config_map - source_dir = os.path.realpath(source_dir) + source_dir = os.path.abspath(source_dir) source_dir = os.path.normcase(source_dir) site_config = os.path.normpath(site_config) config_map[source_dir] = site_config _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits