This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch maven-4.0.x in repository https://gitbox.apache.org/repos/asf/maven.git
commit 3dd76d0e4863386589276f4be44e9f184b522506 Author: Martin Desruisseaux <[email protected]> AuthorDate: Mon Dec 15 12:26:48 2025 +0100 FileSelector.matches(Path) sometime wrong for a file or a directory (#11551) * Brace expansion must be applied also to the "/**" path suffix. * Detection of "match all" pattern must take brace expansion in account. * Optimization for excluding whole directories should be more conservative. * Create the directory matchers only if requested and return a more direct `PathMatcher` for directories. --- .../maven/impl/DefaultPathMatcherFactory.java | 4 +- .../java/org/apache/maven/impl/PathSelector.java | 230 ++++++++++++--------- .../maven/impl/DefaultPathMatcherFactoryTest.java | 29 +++ 3 files changed, 162 insertions(+), 101 deletions(-) diff --git a/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultPathMatcherFactory.java b/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultPathMatcherFactory.java index 83d1919385..f26f6cde06 100644 --- a/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultPathMatcherFactory.java +++ b/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultPathMatcherFactory.java @@ -66,9 +66,7 @@ public PathMatcher createExcludeOnlyMatcher( @Override public PathMatcher deriveDirectoryMatcher(@Nonnull PathMatcher fileMatcher) { if (Objects.requireNonNull(fileMatcher) instanceof PathSelector selector) { - if (selector.canFilterDirectories()) { - return selector::couldHoldSelected; - } + return selector.createDirectoryMatcher(); } return PathSelector.INCLUDES_ALL; } diff --git a/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java b/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java index a8cc3da2ec..0f3d1a3c12 100644 --- a/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java +++ b/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java @@ -29,7 +29,6 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; -import java.util.Set; import org.apache.maven.api.annotations.Nonnull; @@ -49,9 +48,10 @@ * <ul> * <li>The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}. * Note that it means that the backslash cannot be used for escaping characters.</li> - * <li>Trailing {@code "/"} is completed as {@code "/**"}.</li> - * <li>The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories". - * This is implemented by adding variants of the pattern without the {@code "**"} wildcard.</li> + * <li>Trailing {@code "/"} is completed as {@value #WILDCARD_FOR_ANY_SUFFIX}.</li> + * <li>The Maven {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories". + * The Maven behavior is implemented with the {@value #WILDCARD_FOR_ANY_PREFIX} or + * {@value #WILDCARD_FOR_ANY_SUFFIX} wildcard, depending where the wildcard appears.</li> * <li>Bracket characters [ ] and { } are escaped.</li> * <li>On Unix only, the escape character {@code '\\'} is itself escaped.</li> * </ul> @@ -158,6 +158,18 @@ final class PathSelector implements PathMatcher { */ private static final String SPECIAL_CHARACTERS = "*?[]{}\\"; + /** + * The wildcard used by the "glob" syntax for meaning zero or more leading directories. + * It cannot be {@code "**/"} because that wildcard matches one or more directories. + */ + private static final String WILDCARD_FOR_ANY_PREFIX = "{**/,}"; + + /** + * The wildcard used by the "glob" syntax for meaning zero or more trailing directories. + * It cannot be {@code "/**"} because that wildcard matches one or more directories. + */ + private static final String WILDCARD_FOR_ANY_SUFFIX = "{/**,}"; + /** * A path matcher which accepts all files. * @@ -196,21 +208,6 @@ final class PathSelector implements PathMatcher { */ private final PathMatcher[] excludes; - /** - * The matcher for all directories to include. This array includes the parents of all those directories, - * because they need to be accepted before we can walk to the sub-directories. - * This is an optimization for skipping whole directories when possible. - * An empty array means to include all directories. - */ - private final PathMatcher[] dirIncludes; - - /** - * The matcher for directories to exclude. This array does <em>not</em> include the parent directories, - * because they may contain other sub-trees that need to be included. - * This is an optimization for skipping whole directories when possible. - */ - private final PathMatcher[] dirExcludes; - /** * The base directory. All files will be relativized to that directory before to be matched. */ @@ -243,8 +240,6 @@ private PathSelector( FileSystem fileSystem = baseDirectory.getFileSystem(); this.includes = matchers(fileSystem, includePatterns); this.excludes = matchers(fileSystem, excludePatterns); - dirIncludes = matchers(fileSystem, directoryPatterns(includePatterns, false)); - dirExcludes = matchers(fileSystem, directoryPatterns(excludePatterns, true)); needRelativize = needRelativize(includePatterns) || needRelativize(excludePatterns); } @@ -461,69 +456,20 @@ private static String[] normalizePatterns(final Collection<String> patterns, fin // Transform ** patterns to use brace expansion for POSIX behavior // This replaces the complex addPatternsWithOneDirRemoved logic // We perform this after escaping so that only these injected braces participate in expansion - pattern = pattern.replace("**/", "{**/,}"); - + pattern = pattern.replace("**/", WILDCARD_FOR_ANY_PREFIX); + if (pattern.endsWith("/**")) { + pattern = pattern.substring(0, pattern.length() - 3) + WILDCARD_FOR_ANY_SUFFIX; + } normalized.add(DEFAULT_SYNTAX + pattern); } else { normalized.add(pattern); } } } - return simplify(normalized, excludes); - } - - /** - * Applies some heuristic rules for simplifying the set of patterns, - * then returns the patterns as an array. - * - * @param patterns the patterns to simplify and return as an array - * @param excludes whether the patterns are exclude patterns - * @return the set content as an array, after simplification - */ - private static String[] simplify(Set<String> patterns, boolean excludes) { - /* - * If the "**" pattern is present, it makes all other patterns useless. - * In the case of include patterns, an empty set means to include everything. - */ - if (patterns.remove("**")) { - patterns.clear(); - if (excludes) { - patterns.add("**"); - } - } - return patterns.toArray(String[]::new); - } - - /** - * Eventually adds the parent directory of the given patterns, without duplicated values. - * The patterns given to this method should have been normalized. - * - * @param patterns the normalized include or exclude patterns - * @param excludes whether the patterns are exclude patterns - * @return patterns of directories to include or exclude - */ - private static String[] directoryPatterns(final String[] patterns, final boolean excludes) { - // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. - final var directories = new LinkedHashSet<String>(patterns.length); - for (String pattern : patterns) { - if (pattern.startsWith(DEFAULT_SYNTAX)) { - if (excludes) { - if (pattern.endsWith("/**")) { - directories.add(pattern.substring(0, pattern.length() - 3)); - } - } else { - int s = pattern.indexOf(':'); - if (pattern.regionMatches(++s, "**/", 0, 3)) { - s = pattern.indexOf('/', s + 3); - if (s < 0) { - return new String[0]; // Pattern is "**", so we need to accept everything. - } - directories.add(pattern.substring(0, s)); - } - } - } + if (!excludes && normalized.contains(DEFAULT_SYNTAX + WILDCARD_FOR_ANY_PREFIX)) { + return new String[0]; // Include everything. } - return simplify(directories, excludes); + return normalized.toArray(String[]::new); } /** @@ -534,7 +480,7 @@ private static String[] directoryPatterns(final String[] patterns, final boolean */ private static boolean needRelativize(String[] patterns) { for (String pattern : patterns) { - if (!pattern.startsWith(DEFAULT_SYNTAX + "**/")) { + if (!pattern.startsWith(DEFAULT_SYNTAX + WILDCARD_FOR_ANY_PREFIX)) { return true; } } @@ -554,15 +500,18 @@ private static PathMatcher[] matchers(final FileSystem fs, final String[] patter } /** - * {@return a potentially simpler matcher equivalent to this matcher}. + * {@return a potentially simpler matcher equivalent to this matcher} */ @SuppressWarnings("checkstyle:MissingSwitchDefault") private PathMatcher simplify() { - if (!needRelativize && excludes.length == 0) { + if (excludes.length == 0) { switch (includes.length) { case 0: return INCLUDES_ALL; case 1: + if (needRelativize) { + break; + } return includes[0]; } } @@ -598,30 +547,115 @@ private static boolean isMatched(Path path, PathMatcher[] matchers) { } /** - * Returns whether {@link #couldHoldSelected(Path)} may return {@code false} for some directories. - * This method can be used to determine if directory filtering optimization is possible. - * - * @return {@code true} if directory filtering is possible, {@code false} if all directories - * will be considered as potentially containing selected files + * Returns a matcher that can be used for pre-filtering the directories. + * The returned matcher can be used as an optimization for skipping whole directories when possible. + * If there is no such optimization, then this method returns {@link #INCLUDES_ALL}. */ - boolean canFilterDirectories() { - return dirIncludes.length != 0 || dirExcludes.length != 0; + PathMatcher createDirectoryMatcher() { + return new DirectoryPrefiltering().simplify(); } /** - * Determines whether a directory could contain selected paths. - * - * @param directory the directory pathname to test, must not be {@code null} - * @return {@code true} if the given directory might contain selected paths, {@code false} if the - * directory will definitively not contain selected paths + * A matcher for skipping whole directories when possible. */ - public boolean couldHoldSelected(Path directory) { - if (baseDirectory.equals(directory)) { - return true; + private final class DirectoryPrefiltering implements PathMatcher { + /** + * Suffixes of patterns matching a whole directory. + */ + private static final String[] SUFFIXES = {WILDCARD_FOR_ANY_SUFFIX, "/**"}; + + /** + * Matchers for directories that can safely be skipped fully. + */ + private final PathMatcher[] dirExcludes; + + /** + * Whether to ignore the includes defined by the enclosing class. + * This flag can be {@code false} if we determined that all includes are applicable to directories. + * This flag should be {@code true} in case of doubt since directory filtering is only an optimization. + */ + private final boolean ignoreIncludes; + + /** + * Creates a new matcher for directories. + */ + @SuppressWarnings("StringEquality") + DirectoryPrefiltering() { + final var excludeDirPatterns = new LinkedHashSet<String>(); + for (String pattern : excludePatterns) { + String directory = trimSuffixes(pattern); + if (directory != pattern) { // Identity comparison is sufficient here. + excludeDirPatterns.add(directory); + } + } + if (excludeDirPatterns.contains(DEFAULT_SYNTAX)) { + // A pattern was something like "glob:{/**,}", which exclude everything. + dirExcludes = new PathMatcher[] {INCLUDES_ALL}; + ignoreIncludes = true; + return; + } + dirExcludes = matchers(baseDirectory.getFileSystem(), excludeDirPatterns.toArray(String[]::new)); + for (String pattern : includePatterns) { + if (trimSuffixes(pattern) == pattern) { // Identity comparison is sufficient here. + ignoreIncludes = true; + return; + } + } + ignoreIncludes = (includes.length == 0); + } + + /** + * If the given pattern matches everything (files and sub-directories) in a directory, + * returns the pattern without the "match all" suffix. + * Otherwise returns {@code pattern}. + */ + private static String trimSuffixes(String pattern) { + if (pattern.startsWith(DEFAULT_SYNTAX)) { + // This algorithm is not really exhaustive, but it is probably not worth to be stricter. + for (String suffix : SUFFIXES) { + while (pattern.endsWith(suffix)) { + pattern = pattern.substring(0, pattern.length() - suffix.length()); + } + } + } + return pattern; + } + + /** + * {@return a potentially simpler matcher equivalent to this matcher} + */ + PathMatcher simplify() { + if (dirExcludes.length == 0) { + if (ignoreIncludes) { + return INCLUDES_ALL; + } + if (includes.length == 1) { + return includes[0]; + } + } + return this; + } + + /** + * Determines whether a directory could contain selected paths. + * + * @param directory the directory pathname to test, must not be {@code null} + * @return {@code true} if the given directory might contain selected paths, {@code false} if the + * directory will definitively not contain selected paths + */ + @Override + public boolean matches(Path directory) { + if (baseDirectory.equals(directory)) { + return true; + } + if (needRelativize) { + directory = baseDirectory.relativize(directory); + } + if (isMatched(directory, dirExcludes)) { + return false; + } + return ignoreIncludes || isMatched(directory, includes); } - directory = baseDirectory.relativize(directory); - return (dirIncludes.length == 0 || isMatched(directory, dirIncludes)) - && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes)); } /** diff --git a/impl/maven-impl/src/test/java/org/apache/maven/impl/DefaultPathMatcherFactoryTest.java b/impl/maven-impl/src/test/java/org/apache/maven/impl/DefaultPathMatcherFactoryTest.java index 964b2b6b9f..63c04c129c 100644 --- a/impl/maven-impl/src/test/java/org/apache/maven/impl/DefaultPathMatcherFactoryTest.java +++ b/impl/maven-impl/src/test/java/org/apache/maven/impl/DefaultPathMatcherFactoryTest.java @@ -235,4 +235,33 @@ public void testDeriveDirectoryMatcher(@TempDir Path tempDir) throws IOException assertTrue(dirMatcher4.matches(subDir) || !dirMatcher4.matches(subDir)); // Always true, just testing it doesn't throw } + + /** + * Verifies that the directory matcher accepts the {@code "foo"} directory (at root) + * when using the {@code "**/*foo*/**"} include pattern. + * Of course, the {@code "org/foo"} directory must also be accepted. + */ + @Test + public void testWildcardMatchesAlsoZeroDirectory() { + Path dir = Path.of("/tmp"); // We will not really create any file. + + // We need two patterns for preventing `PathSelector` to discard itself as an optimization. + PathMatcher anyMatcher = factory.createPathMatcher(dir, List.of("**/*foo*/**", "dummy/**"), null, false); + PathMatcher dirMatcher = factory.deriveDirectoryMatcher(anyMatcher); + + assertTrue(dirMatcher.matches(dir.resolve(Path.of("foo")))); + assertTrue(anyMatcher.matches(dir.resolve(Path.of("foo")))); + assertTrue(dirMatcher.matches(dir.resolve(Path.of("org", "foo")))); + assertTrue(anyMatcher.matches(dir.resolve(Path.of("org", "foo")))); + assertTrue(dirMatcher.matches(dir.resolve(Path.of("foo", "more")))); + assertTrue(anyMatcher.matches(dir.resolve(Path.of("foo", "more")))); + assertTrue(dirMatcher.matches(dir.resolve(Path.of("org", "foo", "more")))); + assertTrue(anyMatcher.matches(dir.resolve(Path.of("org", "foo", "more")))); + assertTrue(dirMatcher.matches(dir.resolve(Path.of("org", "0foo0", "more")))); + assertTrue(anyMatcher.matches(dir.resolve(Path.of("org", "0foo0", "more")))); + assertFalse(dirMatcher.matches(dir.resolve(Path.of("org", "bar", "more")))); + assertFalse(anyMatcher.matches(dir.resolve(Path.of("org", "bar", "more")))); + assertFalse(dirMatcher.matches(dir.resolve(Path.of("bar")))); + assertFalse(anyMatcher.matches(dir.resolve(Path.of("bar")))); + } }
