This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-io.git
commit 6dd4c0850cdc0c92a8171efbd98649bb3993a8c8 Author: Gary Gregory <[email protected]> AuthorDate: Fri Sep 3 17:43:23 2021 -0400 Sort members. Refactor common code into private toSeparator(boolean) method. --- .../java/org/apache/commons/io/FilenameUtils.java | 1897 ++++++++++---------- 1 file changed, 953 insertions(+), 944 deletions(-) diff --git a/src/main/java/org/apache/commons/io/FilenameUtils.java b/src/main/java/org/apache/commons/io/FilenameUtils.java index 6edb92c..fd7cd64 100644 --- a/src/main/java/org/apache/commons/io/FilenameUtils.java +++ b/src/main/java/org/apache/commons/io/FilenameUtils.java @@ -106,238 +106,206 @@ public class FilenameUtils { /** * The Unix separator character. */ - private static final char UNIX_SEPARATOR = '/'; + private static final char UNIX_NAME_SEPARATOR = '/'; /** * The Windows separator character. */ - private static final char WINDOWS_SEPARATOR = '\\'; + private static final char WINDOWS_NAME_SEPARATOR = '\\'; /** * The system separator character. */ - private static final char SYSTEM_SEPARATOR = File.separatorChar; + private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar; /** * The separator character that is the opposite of the system separator. */ private static final char OTHER_SEPARATOR; + static { if (isSystemWindows()) { - OTHER_SEPARATOR = UNIX_SEPARATOR; + OTHER_SEPARATOR = UNIX_NAME_SEPARATOR; } else { - OTHER_SEPARATOR = WINDOWS_SEPARATOR; + OTHER_SEPARATOR = WINDOWS_NAME_SEPARATOR; } } - /** - * Instances should NOT be constructed in standard programming. - */ - public FilenameUtils() { - } + private static final Pattern IPV4_PATTERN = + Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); - /** - * Determines if Windows file system is in use. - * - * @return true if the system is Windows - */ - static boolean isSystemWindows() { - return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; - } + private static final int IPV4_MAX_OCTET_VALUE = 255; - /** - * Checks if the character is a separator. - * - * @param ch the character to check - * @return true if it is a separator character - */ - private static boolean isSeparator(final char ch) { - return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; - } + private static final int IPV6_MAX_HEX_GROUPS = 8; + + private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; + private static final int MAX_UNSIGNED_SHORT = 0xffff; + + private static final int BASE_16 = 16; + + private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); /** - * Normalizes a path, removing double and single dot path steps. + * Concatenates a fileName to a base path using normal command line style rules. * <p> - * This method normalizes a path to a standard format. - * The input may contain separators in either Unix or Windows format. - * The output will contain separators in the format of the system. + * The effect is equivalent to resultant directory after changing + * directory to the first argument, followed by changing directory to + * the second argument. + * </p> * <p> - * A trailing slash will be retained. - * A double slash will be merged to a single slash (but UNC names are handled). - * A single dot path segment will be removed. - * A double dot will cause that path segment and the one before to be removed. - * If the double dot has no parent path segment to work with, {@code null} - * is returned. + * The first argument is the base path, the second is the path to concatenate. + * The returned path is always normalized via {@link #normalize(String)}, + * thus {@code ..} is handled. + * </p> + * <p> + * If {@code pathToAdd} is absolute (has an absolute prefix), then + * it will be normalized and returned. + * Otherwise, the paths will be joined, normalized and returned. + * </p> * <p> * The output will be the same on both Unix and Windows except * for the separator character. + * </p> * <pre> - * /foo// --> /foo/ - * /foo/./ --> /foo/ - * /foo/../bar --> /bar - * /foo/../bar/ --> /bar/ - * /foo/../bar/../baz --> /baz - * //foo//./bar --> /foo/bar - * /../ --> null - * ../foo --> null - * foo/bar/.. --> foo/ - * foo/../../bar --> null - * foo/../bar --> bar - * //server/foo/../bar --> //server/bar - * //server/../bar --> null - * C:\foo\..\bar --> C:\bar - * C:\..\bar --> null - * ~/foo/../bar/ --> ~/bar/ - * ~/../bar --> null + * /foo/ + bar --> /foo/bar + * /foo + bar --> /foo/bar + * /foo + /bar --> /bar + * /foo + C:/bar --> C:/bar + * /foo + C:bar --> C:bar [1] + * /foo/a/ + ../bar --> /foo/bar + * /foo/ + ../../bar --> null + * /foo/ + /bar --> /bar + * /foo/.. + /bar --> /bar + * /foo + bar/c.txt --> /foo/bar/c.txt + * /foo/c.txt + bar --> /foo/c.txt/bar [2] * </pre> - * (Note the file separator returned will be correct for Windows/Unix) + * <p> + * [1] Note that the Windows relative drive prefix is unreliable when + * used with this method. + * </p> + * <p> + * [2] Note that the first parameter must be a path. If it ends with a name, then + * the name will be built into the concatenated path. If this might be a problem, + * use {@link #getFullPath(String)} on the base path argument. + * </p> * - * @param fileName the fileName to normalize, null returns null - * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed + * @param basePath the base path to attach to, always treated as a path + * @param fullFileNameToAdd the fileName (or path) to attach to the base + * @return the concatenated path, or null if invalid. Null bytes inside string will be removed */ - public static String normalize(final String fileName) { - return doNormalize(fileName, SYSTEM_SEPARATOR, true); + public static String concat(final String basePath, final String fullFileNameToAdd) { + final int prefix = getPrefixLength(fullFileNameToAdd); + if (prefix < 0) { + return null; + } + if (prefix > 0) { + return normalize(fullFileNameToAdd); + } + if (basePath == null) { + return null; + } + final int len = basePath.length(); + if (len == 0) { + return normalize(fullFileNameToAdd); + } + final char ch = basePath.charAt(len - 1); + if (isSeparator(ch)) { + return normalize(basePath + fullFileNameToAdd); + } + return normalize(basePath + '/' + fullFileNameToAdd); } + /** - * Normalizes a path, removing double and single dot path steps. - * <p> - * This method normalizes a path to a standard format. - * The input may contain separators in either Unix or Windows format. - * The output will contain separators in the format specified. - * <p> - * A trailing slash will be retained. - * A double slash will be merged to a single slash (but UNC names are handled). - * A single dot path segment will be removed. - * A double dot will cause that path segment and the one before to be removed. - * If the double dot has no parent path segment to work with, {@code null} - * is returned. + * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). * <p> - * The output will be the same on both Unix and Windows except - * for the separator character. - * <pre> - * /foo// --> /foo/ - * /foo/./ --> /foo/ - * /foo/../bar --> /bar - * /foo/../bar/ --> /bar/ - * /foo/../bar/../baz --> /baz - * //foo//./bar --> /foo/bar - * /../ --> null - * ../foo --> null - * foo/bar/.. --> foo/ - * foo/../../bar --> null - * foo/../bar --> bar - * //server/foo/../bar --> //server/bar - * //server/../bar --> null - * C:\foo\..\bar --> C:\bar - * C:\..\bar --> null - * ~/foo/../bar/ --> ~/bar/ - * ~/../bar --> null - * </pre> - * The output will be the same on both Unix and Windows including - * the separator character. + * The files names are expected to be normalized. + * </p> * - * @param fileName the fileName to normalize, null returns null - * @param unixSeparator {@code true} if a unix separator should - * be used or {@code false} if a windows separator should be used. - * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed - * @since 2.0 + * Edge cases: + * <ul> + * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> + * <li>A directory does not contain itself: return false</li> + * <li>A null child file is not contained in any parent: return false</li> + * </ul> + * + * @param canonicalParent + * the file to consider as the parent. + * @param canonicalChild + * the file to consider as the child. + * @return true is the candidate leaf is under by the specified composite. False otherwise. + * @since 2.2 + * @see FileUtils#directoryContains(File, File) */ - public static String normalize(final String fileName, final boolean unixSeparator) { - final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; - return doNormalize(fileName, separator, true); + public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { + if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) { + return false; + } + + if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { + return false; + } + + final char separator = canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR; + final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator; + + return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator); } /** - * Normalizes a path, removing double and single dot path steps, - * and removing any final directory separator. - * <p> - * This method normalizes a path to a standard format. - * The input may contain separators in either Unix or Windows format. - * The output will contain separators in the format of the system. - * <p> - * A trailing slash will be removed. - * A double slash will be merged to a single slash (but UNC names are handled). - * A single dot path segment will be removed. - * A double dot will cause that path segment and the one before to be removed. - * If the double dot has no parent path segment to work with, {@code null} - * is returned. - * <p> - * The output will be the same on both Unix and Windows except - * for the separator character. - * <pre> - * /foo// --> /foo - * /foo/./ --> /foo - * /foo/../bar --> /bar - * /foo/../bar/ --> /bar - * /foo/../bar/../baz --> /baz - * //foo//./bar --> /foo/bar - * /../ --> null - * ../foo --> null - * foo/bar/.. --> foo - * foo/../../bar --> null - * foo/../bar --> bar - * //server/foo/../bar --> //server/bar - * //server/../bar --> null - * C:\foo\..\bar --> C:\bar - * C:\..\bar --> null - * ~/foo/../bar/ --> ~/bar - * ~/../bar --> null - * </pre> - * (Note the file separator returned will be correct for Windows/Unix) + * Does the work of getting the path. * - * @param fileName the fileName to normalize, null returns null - * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed + * @param fileName the fileName + * @param includeSeparator true to include the end separator + * @return the path */ - public static String normalizeNoEndSeparator(final String fileName) { - return doNormalize(fileName, SYSTEM_SEPARATOR, false); + private static String doGetFullPath(final String fileName, final boolean includeSeparator) { + if (fileName == null) { + return null; + } + final int prefix = getPrefixLength(fileName); + if (prefix < 0) { + return null; + } + if (prefix >= fileName.length()) { + if (includeSeparator) { + return getPrefix(fileName); // add end slash if necessary + } + return fileName; + } + final int index = indexOfLastSeparator(fileName); + if (index < 0) { + return fileName.substring(0, prefix); + } + int end = index + (includeSeparator ? 1 : 0); + if (end == 0) { + end++; + } + return fileName.substring(0, end); } /** - * Normalizes a path, removing double and single dot path steps, - * and removing any final directory separator. - * <p> - * This method normalizes a path to a standard format. - * The input may contain separators in either Unix or Windows format. - * The output will contain separators in the format specified. - * <p> - * A trailing slash will be removed. - * A double slash will be merged to a single slash (but UNC names are handled). - * A single dot path segment will be removed. - * A double dot will cause that path segment and the one before to be removed. - * If the double dot has no parent path segment to work with, {@code null} - * is returned. - * <p> - * The output will be the same on both Unix and Windows including - * the separator character. - * <pre> - * /foo// --> /foo - * /foo/./ --> /foo - * /foo/../bar --> /bar - * /foo/../bar/ --> /bar - * /foo/../bar/../baz --> /baz - * //foo//./bar --> /foo/bar - * /../ --> null - * ../foo --> null - * foo/bar/.. --> foo - * foo/../../bar --> null - * foo/../bar --> bar - * //server/foo/../bar --> //server/bar - * //server/../bar --> null - * C:\foo\..\bar --> C:\bar - * C:\..\bar --> null - * ~/foo/../bar/ --> ~/bar - * ~/../bar --> null - * </pre> - * - * @param fileName the fileName to normalize, null returns null - * @param unixSeparator {@code true} if a unix separator should - * be used or {@code false} if a windows separator should be used. - * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed - * @since 2.0 + * Does the work of getting the path. + * + * @param fileName the fileName + * @param separatorAdd 0 to omit the end separator, 1 to return it + * @return the path. Null bytes inside string will be removed */ - public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { - final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; - return doNormalize(fileName, separator, false); + private static String doGetPath(final String fileName, final int separatorAdd) { + if (fileName == null) { + return null; + } + final int prefix = getPrefixLength(fileName); + if (prefix < 0) { + return null; + } + final int index = indexOfLastSeparator(fileName); + final int endIndex = index+separatorAdd; + if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { + return EMPTY_STRING; + } + final String path = fileName.substring(prefix, endIndex); + requireNonNullChars(path); + return path; } /** @@ -368,7 +336,7 @@ public class FilenameUtils { fileName.getChars(0, fileName.length(), array, 0); // fix separators throughout - final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; + final char otherSeparator = separator == SYSTEM_NAME_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_NAME_SEPARATOR; for (int i = 0; i < array.length; i++) { if (array[i] == otherSeparator) { array[i] = separator; @@ -447,152 +415,369 @@ public class FilenameUtils { } /** - * Concatenates a fileName to a base path using normal command line style rules. + * Checks whether two fileNames are equal exactly. * <p> - * The effect is equivalent to resultant directory after changing - * directory to the first argument, followed by changing directory to - * the second argument. - * </p> + * No processing is performed on the fileNames other than comparison, + * thus this is merely a null-safe case-sensitive equals. + * + * @param fileName1 the first fileName to query, may be null + * @param fileName2 the second fileName to query, may be null + * @return true if the fileNames are equal, null equals null + * @see IOCase#SENSITIVE + */ + public static boolean equals(final String fileName1, final String fileName2) { + return equals(fileName1, fileName2, false, IOCase.SENSITIVE); + } + + /** + * Checks whether two fileNames are equal, optionally normalizing and providing + * control over the case-sensitivity. + * + * @param fileName1 the first fileName to query, may be null + * @param fileName2 the second fileName to query, may be null + * @param normalized whether to normalize the fileNames + * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive + * @return true if the fileNames are equal, null equals null + * @since 1.3 + */ + public static boolean equals( + String fileName1, String fileName2, + final boolean normalized, IOCase caseSensitivity) { + + if (fileName1 == null || fileName2 == null) { + return fileName1 == null && fileName2 == null; + } + if (normalized) { + fileName1 = normalize(fileName1); + if (fileName1 == null) { + return false; + } + fileName2 = normalize(fileName2); + if (fileName2 == null) { + return false; + } + } + if (caseSensitivity == null) { + caseSensitivity = IOCase.SENSITIVE; + } + return caseSensitivity.checkEquals(fileName1, fileName2); + } + + /** + * Checks whether two fileNames are equal after both have been normalized. * <p> - * The first argument is the base path, the second is the path to concatenate. - * The returned path is always normalized via {@link #normalize(String)}, - * thus {@code ..} is handled. - * </p> + * Both fileNames are first passed to {@link #normalize(String)}. + * The check is then performed in a case-sensitive manner. + * + * @param fileName1 the first fileName to query, may be null + * @param fileName2 the second fileName to query, may be null + * @return true if the fileNames are equal, null equals null + * @see IOCase#SENSITIVE + */ + public static boolean equalsNormalized(final String fileName1, final String fileName2) { + return equals(fileName1, fileName2, true, IOCase.SENSITIVE); + } + + /** + * Checks whether two fileNames are equal after both have been normalized + * and using the case rules of the system. * <p> - * If {@code pathToAdd} is absolute (has an absolute prefix), then - * it will be normalized and returned. - * Otherwise, the paths will be joined, normalized and returned. - * </p> + * Both fileNames are first passed to {@link #normalize(String)}. + * The check is then performed case-sensitive on Unix and + * case-insensitive on Windows. + * + * @param fileName1 the first fileName to query, may be null + * @param fileName2 the second fileName to query, may be null + * @return true if the fileNames are equal, null equals null + * @see IOCase#SYSTEM + */ + public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { + return equals(fileName1, fileName2, true, IOCase.SYSTEM); + } + + /** + * Checks whether two fileNames are equal using the case rules of the system. * <p> - * The output will be the same on both Unix and Windows except - * for the separator character. + * No processing is performed on the fileNames other than comparison. + * The check is case-sensitive on Unix and case-insensitive on Windows. + * + * @param fileName1 the first fileName to query, may be null + * @param fileName2 the second fileName to query, may be null + * @return true if the fileNames are equal, null equals null + * @see IOCase#SYSTEM + */ + public static boolean equalsOnSystem(final String fileName1, final String fileName2) { + return equals(fileName1, fileName2, false, IOCase.SYSTEM); + } + + /** + * Special handling for NTFS ADS: Don't accept colon in the fileName. + * + * @param fileName a file name + * @return ADS offsets. + */ + private static int getAdsCriticalOffset(final String fileName) { + // Step 1: Remove leading path segments. + final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR); + final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); + if (offset1 == -1) { + if (offset2 == -1) { + return 0; + } + return offset2 + 1; + } + if (offset2 == -1) { + return offset1 + 1; + } + return Math.max(offset1, offset2) + 1; + } + + /** + * Gets the base name, minus the full path and extension, from a full fileName. + * <p> + * This method will handle a file in either Unix or Windows format. + * The text after the last forward or backslash and before the last dot is returned. + * <pre> + * a/b/c.txt --> c + * a.txt --> a + * a/b/c --> c + * a/b/c/ --> "" + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on. + * + * @param fileName the fileName to query, null returns null + * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string + * will be removed + */ + public static String getBaseName(final String fileName) { + return removeExtension(getName(fileName)); + } + + /** + * Gets the extension of a fileName. + * <p> + * This method returns the textual part of the fileName after the last dot. + * There must be no directory separator after the dot. + * <pre> + * foo.txt --> "txt" + * a/b/c.jpg --> "jpg" + * a/b.txt/c --> "" + * a/b/c --> "" + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on, with the + * exception of a possible {@link IllegalArgumentException} on Windows (see below). * </p> + * <p> + * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". + * In this case, the name wouldn't be the name of a file, but the identifier of an + * alternate data stream (bar.txt) on the file foo.exe. The method used to return + * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing + * an {@link IllegalArgumentException} for names like this. + * + * @param fileName the fileName to retrieve the extension of. + * @return the extension of the file or an empty string if none exists or {@code null} + * if the fileName is {@code null}. + * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, + * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". + */ + public static String getExtension(final String fileName) throws IllegalArgumentException { + if (fileName == null) { + return null; + } + final int index = indexOfExtension(fileName); + if (index == NOT_FOUND) { + return EMPTY_STRING; + } + return fileName.substring(index + 1); + } + + /** + * Gets the full path from a full fileName, which is the prefix + path. + * <p> + * This method will handle a file in either Unix or Windows format. + * The method is entirely text based, and returns the text before and + * including the last forward or backslash. * <pre> - * /foo/ + bar --> /foo/bar - * /foo + bar --> /foo/bar - * /foo + /bar --> /bar - * /foo + C:/bar --> C:/bar - * /foo + C:bar --> C:bar [1] - * /foo/a/ + ../bar --> /foo/bar - * /foo/ + ../../bar --> null - * /foo/ + /bar --> /bar - * /foo/.. + /bar --> /bar - * /foo + bar/c.txt --> /foo/bar/c.txt - * /foo/c.txt + bar --> /foo/c.txt/bar [2] + * C:\a\b\c.txt --> C:\a\b\ + * ~/a/b/c.txt --> ~/a/b/ + * a.txt --> "" + * a/b/c --> a/b/ + * a/b/c/ --> a/b/c/ + * C: --> C: + * C:\ --> C:\ + * ~ --> ~/ + * ~/ --> ~/ + * ~user --> ~user/ + * ~user/ --> ~user/ + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on. + * + * @param fileName the fileName to query, null returns null + * @return the path of the file, an empty string if none exists, null if invalid + */ + public static String getFullPath(final String fileName) { + return doGetFullPath(fileName, true); + } + + /** + * Gets the full path from a full fileName, which is the prefix + path, + * and also excluding the final directory separator. + * <p> + * This method will handle a file in either Unix or Windows format. + * The method is entirely text based, and returns the text before the + * last forward or backslash. + * <pre> + * C:\a\b\c.txt --> C:\a\b + * ~/a/b/c.txt --> ~/a/b + * a.txt --> "" + * a/b/c --> a/b + * a/b/c/ --> a/b/c + * C: --> C: + * C:\ --> C:\ + * ~ --> ~ + * ~/ --> ~ + * ~user --> ~user + * ~user/ --> ~user * </pre> * <p> - * [1] Note that the Windows relative drive prefix is unreliable when - * used with this method. - * </p> - * <p> - * [2] Note that the first parameter must be a path. If it ends with a name, then - * the name will be built into the concatenated path. If this might be a problem, - * use {@link #getFullPath(String)} on the base path argument. - * </p> + * The output will be the same irrespective of the machine that the code is running on. * - * @param basePath the base path to attach to, always treated as a path - * @param fullFileNameToAdd the fileName (or path) to attach to the base - * @return the concatenated path, or null if invalid. Null bytes inside string will be removed + * @param fileName the fileName to query, null returns null + * @return the path of the file, an empty string if none exists, null if invalid */ - public static String concat(final String basePath, final String fullFileNameToAdd) { - final int prefix = getPrefixLength(fullFileNameToAdd); - if (prefix < 0) { - return null; - } - if (prefix > 0) { - return normalize(fullFileNameToAdd); - } - if (basePath == null) { - return null; - } - final int len = basePath.length(); - if (len == 0) { - return normalize(fullFileNameToAdd); - } - final char ch = basePath.charAt(len - 1); - if (isSeparator(ch)) { - return normalize(basePath + fullFileNameToAdd); - } - return normalize(basePath + '/' + fullFileNameToAdd); + public static String getFullPathNoEndSeparator(final String fileName) { + return doGetFullPath(fileName, false); } /** - * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). + * Gets the name minus the path from a full fileName. * <p> - * The files names are expected to be normalized. - * </p> - * - * Edge cases: - * <ul> - * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> - * <li>A directory does not contain itself: return false</li> - * <li>A null child file is not contained in any parent: return false</li> - * </ul> + * This method will handle a file in either Unix or Windows format. + * The text after the last forward or backslash is returned. + * <pre> + * a/b/c.txt --> c.txt + * a.txt --> a.txt + * a/b/c --> c + * a/b/c/ --> "" + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on. * - * @param canonicalParent - * the file to consider as the parent. - * @param canonicalChild - * the file to consider as the child. - * @return true is the candidate leaf is under by the specified composite. False otherwise. - * @since 2.2 - * @see FileUtils#directoryContains(File, File) + * @param fileName the fileName to query, null returns null + * @return the name of the file without the path, or an empty string if none exists. + * Null bytes inside string will be removed */ - public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { - if (isEmpty(canonicalParent) || isEmpty(canonicalChild)) { - return false; - } - - if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { - return false; + public static String getName(final String fileName) { + if (fileName == null) { + return null; } - - final char separator = canonicalParent.charAt(0) == UNIX_SEPARATOR ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; - final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator; - - return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator); - } - - private static boolean isEmpty(final String string) { - return string == null || string.isEmpty(); + requireNonNullChars(fileName); + final int index = indexOfLastSeparator(fileName); + return fileName.substring(index + 1); } /** - * Converts all separators to the Unix separator of forward slash. + * Gets the path from a full fileName, which excludes the prefix. + * <p> + * This method will handle a file in either Unix or Windows format. + * The method is entirely text based, and returns the text before and + * including the last forward or backslash. + * <pre> + * C:\a\b\c.txt --> a\b\ + * ~/a/b/c.txt --> a/b/ + * a.txt --> "" + * a/b/c --> a/b/ + * a/b/c/ --> a/b/c/ + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on. + * <p> + * This method drops the prefix from the result. + * See {@link #getFullPath(String)} for the method that retains the prefix. * - * @param path the path to be changed, null ignored - * @return the updated path + * @param fileName the fileName to query, null returns null + * @return the path of the file, an empty string if none exists, null if invalid. + * Null bytes inside string will be removed */ - public static String separatorsToUnix(final String path) { - if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) { - return path; - } - return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); + public static String getPath(final String fileName) { + return doGetPath(fileName, 1); } /** - * Converts all separators to the Windows separator of backslash. + * Gets the path from a full fileName, which excludes the prefix, and + * also excluding the final directory separator. + * <p> + * This method will handle a file in either Unix or Windows format. + * The method is entirely text based, and returns the text before the + * last forward or backslash. + * <pre> + * C:\a\b\c.txt --> a\b + * ~/a/b/c.txt --> a/b + * a.txt --> "" + * a/b/c --> a/b + * a/b/c/ --> a/b/c + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on. + * <p> + * This method drops the prefix from the result. + * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. * - * @param path the path to be changed, null ignored - * @return the updated path + * @param fileName the fileName to query, null returns null + * @return the path of the file, an empty string if none exists, null if invalid. + * Null bytes inside string will be removed */ - public static String separatorsToWindows(final String path) { - if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) { - return path; - } - return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); + public static String getPathNoEndSeparator(final String fileName) { + return doGetPath(fileName, 0); } /** - * Converts all separators to the system separator. + * Gets the prefix from a full fileName, such as {@code C:/} + * or {@code ~/}. + * <p> + * This method will handle a file in either Unix or Windows format. + * The prefix includes the first slash in the full fileName where applicable. + * <pre> + * Windows: + * a\b\c.txt --> "" --> relative + * \a\b\c.txt --> "\" --> current drive absolute + * C:a\b\c.txt --> "C:" --> drive relative + * C:\a\b\c.txt --> "C:\" --> absolute + * \\server\a\b\c.txt --> "\\server\" --> UNC * - * @param path the path to be changed, null ignored - * @return the updated path + * Unix: + * a/b/c.txt --> "" --> relative + * /a/b/c.txt --> "/" --> absolute + * ~/a/b/c.txt --> "~/" --> current user + * ~ --> "~/" --> current user (slash added) + * ~user/a/b/c.txt --> "~user/" --> named user + * ~user --> "~user/" --> named user (slash added) + * </pre> + * <p> + * The output will be the same irrespective of the machine that the code is running on. + * ie. both Unix and Windows prefixes are matched regardless. + * + * @param fileName the fileName to query, null returns null + * @return the prefix of the file, null if invalid. Null bytes inside string will be removed */ - public static String separatorsToSystem(final String path) { - if (path == null) { + public static String getPrefix(final String fileName) { + if (fileName == null) { return null; } - return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path); + final int len = getPrefixLength(fileName); + if (len < 0) { + return null; + } + if (len > fileName.length()) { + requireNonNullChars(fileName + UNIX_NAME_SEPARATOR); + return fileName + UNIX_NAME_SEPARATOR; + } + final String path = fileName.substring(0, len); + requireNonNullChars(path); + return path; } /** @@ -653,8 +838,8 @@ public class FilenameUtils { return isSeparator(ch0) ? 1 : 0; } if (ch0 == '~') { - int posUnix = fileName.indexOf(UNIX_SEPARATOR, 1); - int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 1); + int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1); + int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1); if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { return len + 1; // return a length greater than the input } @@ -674,7 +859,7 @@ public class FilenameUtils { } return 3; } - if (ch0 == UNIX_SEPARATOR) { + if (ch0 == UNIX_NAME_SEPARATOR) { return 1; } return NOT_FOUND; @@ -683,8 +868,8 @@ public class FilenameUtils { if (!isSeparator(ch0) || !isSeparator(ch1)) { return isSeparator(ch0) ? 1 : 0; } - int posUnix = fileName.indexOf(UNIX_SEPARATOR, 2); - int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 2); + int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2); + int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2); if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { return NOT_FOUND; } @@ -696,27 +881,6 @@ public class FilenameUtils { } /** - * Returns the index of the last directory separator character. - * <p> - * This method will handle a file in either Unix or Windows format. - * The position of the last forward or backslash is returned. - * <p> - * The output will be the same irrespective of the machine that the code is running on. - * - * @param fileName the fileName to find the last path separator in, null returns -1 - * @return the index of the last separator character, or -1 if there - * is no such character - */ - public static int indexOfLastSeparator(final String fileName) { - if (fileName == null) { - return NOT_FOUND; - } - final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR); - final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR); - return Math.max(lastUnixPos, lastWindowsPos); - } - - /** * Returns the index of the last extension separator character, which is a dot. * <p> * This method also checks that there is no directory separator after the last dot. To do this it uses @@ -755,344 +919,451 @@ public class FilenameUtils { } /** - * Gets the prefix from a full fileName, such as {@code C:/} - * or {@code ~/}. + * Returns the index of the last directory separator character. * <p> * This method will handle a file in either Unix or Windows format. - * The prefix includes the first slash in the full fileName where applicable. - * <pre> - * Windows: - * a\b\c.txt --> "" --> relative - * \a\b\c.txt --> "\" --> current drive absolute - * C:a\b\c.txt --> "C:" --> drive relative - * C:\a\b\c.txt --> "C:\" --> absolute - * \\server\a\b\c.txt --> "\\server\" --> UNC + * The position of the last forward or backslash is returned. + * <p> + * The output will be the same irrespective of the machine that the code is running on. * - * Unix: - * a/b/c.txt --> "" --> relative - * /a/b/c.txt --> "/" --> absolute - * ~/a/b/c.txt --> "~/" --> current user - * ~ --> "~/" --> current user (slash added) - * ~user/a/b/c.txt --> "~user/" --> named user - * ~user --> "~user/" --> named user (slash added) - * </pre> + * @param fileName the fileName to find the last path separator in, null returns -1 + * @return the index of the last separator character, or -1 if there + * is no such character + */ + public static int indexOfLastSeparator(final String fileName) { + if (fileName == null) { + return NOT_FOUND; + } + final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR); + final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR); + return Math.max(lastUnixPos, lastWindowsPos); + } + + private static boolean isEmpty(final String string) { + return string == null || string.isEmpty(); + } + + /** + * Checks whether the extension of the fileName is one of those specified. * <p> - * The output will be the same irrespective of the machine that the code is running on. - * ie. both Unix and Windows prefixes are matched regardless. + * This method obtains the extension as the textual part of the fileName + * after the last dot. There must be no directory separator after the dot. + * The extension check is case-sensitive on all platforms. * - * @param fileName the fileName to query, null returns null - * @return the prefix of the file, null if invalid. Null bytes inside string will be removed + * @param fileName the fileName to query, null returns false + * @param extensions the extensions to check for, null checks for no extension + * @return true if the fileName is one of the extensions + * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes */ - public static String getPrefix(final String fileName) { + public static boolean isExtension(final String fileName, final Collection<String> extensions) { if (fileName == null) { - return null; + return false; } - final int len = getPrefixLength(fileName); - if (len < 0) { - return null; + requireNonNullChars(fileName); + + if (extensions == null || extensions.isEmpty()) { + return indexOfExtension(fileName) == NOT_FOUND; } - if (len > fileName.length()) { - requireNonNullChars(fileName + UNIX_SEPARATOR); - return fileName + UNIX_SEPARATOR; + final String fileExt = getExtension(fileName); + for (final String extension : extensions) { + if (fileExt.equals(extension)) { + return true; + } } - final String path = fileName.substring(0, len); - requireNonNullChars(path); - return path; + return false; } /** - * Gets the path from a full fileName, which excludes the prefix. - * <p> - * This method will handle a file in either Unix or Windows format. - * The method is entirely text based, and returns the text before and - * including the last forward or backslash. - * <pre> - * C:\a\b\c.txt --> a\b\ - * ~/a/b/c.txt --> a/b/ - * a.txt --> "" - * a/b/c --> a/b/ - * a/b/c/ --> a/b/c/ - * </pre> - * <p> - * The output will be the same irrespective of the machine that the code is running on. + * Checks whether the extension of the fileName is that specified. * <p> - * This method drops the prefix from the result. - * See {@link #getFullPath(String)} for the method that retains the prefix. + * This method obtains the extension as the textual part of the fileName + * after the last dot. There must be no directory separator after the dot. + * The extension check is case-sensitive on all platforms. * - * @param fileName the fileName to query, null returns null - * @return the path of the file, an empty string if none exists, null if invalid. - * Null bytes inside string will be removed + * @param fileName the fileName to query, null returns false + * @param extension the extension to check for, null or empty checks for no extension + * @return true if the fileName has the specified extension + * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes */ - public static String getPath(final String fileName) { - return doGetPath(fileName, 1); - } + public static boolean isExtension(final String fileName, final String extension) { + if (fileName == null) { + return false; + } + requireNonNullChars(fileName); - /** - * Gets the path from a full fileName, which excludes the prefix, and - * also excluding the final directory separator. - * <p> - * This method will handle a file in either Unix or Windows format. - * The method is entirely text based, and returns the text before the - * last forward or backslash. - * <pre> - * C:\a\b\c.txt --> a\b - * ~/a/b/c.txt --> a/b - * a.txt --> "" - * a/b/c --> a/b - * a/b/c/ --> a/b/c - * </pre> - * <p> - * The output will be the same irrespective of the machine that the code is running on. - * <p> - * This method drops the prefix from the result. - * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. - * - * @param fileName the fileName to query, null returns null - * @return the path of the file, an empty string if none exists, null if invalid. - * Null bytes inside string will be removed - */ - public static String getPathNoEndSeparator(final String fileName) { - return doGetPath(fileName, 0); + if (isEmpty(extension)) { + return indexOfExtension(fileName) == NOT_FOUND; + } + final String fileExt = getExtension(fileName); + return fileExt.equals(extension); } /** - * Does the work of getting the path. + * Checks whether the extension of the fileName is one of those specified. + * <p> + * This method obtains the extension as the textual part of the fileName + * after the last dot. There must be no directory separator after the dot. + * The extension check is case-sensitive on all platforms. * - * @param fileName the fileName - * @param separatorAdd 0 to omit the end separator, 1 to return it - * @return the path. Null bytes inside string will be removed + * @param fileName the fileName to query, null returns false + * @param extensions the extensions to check for, null checks for no extension + * @return true if the fileName is one of the extensions + * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes */ - private static String doGetPath(final String fileName, final int separatorAdd) { + public static boolean isExtension(final String fileName, final String... extensions) { if (fileName == null) { - return null; + return false; } - final int prefix = getPrefixLength(fileName); - if (prefix < 0) { - return null; + requireNonNullChars(fileName); + + if (extensions == null || extensions.length == 0) { + return indexOfExtension(fileName) == NOT_FOUND; } - final int index = indexOfLastSeparator(fileName); - final int endIndex = index+separatorAdd; - if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { - return EMPTY_STRING; + final String fileExt = getExtension(fileName); + for (final String extension : extensions) { + if (fileExt.equals(extension)) { + return true; + } } - final String path = fileName.substring(prefix, endIndex); - requireNonNullChars(path); - return path; + return false; } /** - * Gets the full path from a full fileName, which is the prefix + path. - * <p> - * This method will handle a file in either Unix or Windows format. - * The method is entirely text based, and returns the text before and - * including the last forward or backslash. - * <pre> - * C:\a\b\c.txt --> C:\a\b\ - * ~/a/b/c.txt --> ~/a/b/ - * a.txt --> "" - * a/b/c --> a/b/ - * a/b/c/ --> a/b/c/ - * C: --> C: - * C:\ --> C:\ - * ~ --> ~/ - * ~/ --> ~/ - * ~user --> ~user/ - * ~user/ --> ~user/ - * </pre> - * <p> - * The output will be the same irrespective of the machine that the code is running on. + * Checks whether a given string represents a valid IPv4 address. * - * @param fileName the fileName to query, null returns null - * @return the path of the file, an empty string if none exists, null if invalid + * @param name the name to validate + * @return true if the given name is a valid IPv4 address */ - public static String getFullPath(final String fileName) { - return doGetFullPath(fileName, true); + // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address + private static boolean isIPv4Address(final String name) { + final Matcher m = IPV4_PATTERN.matcher(name); + if (!m.matches() || m.groupCount() != 4) { + return false; + } + + // verify that address subgroups are legal + for (int i = 1; i <= 4; i++) { + final String ipSegment = m.group(i); + final int iIpSegment = Integer.parseInt(ipSegment); + if (iIpSegment > IPV4_MAX_OCTET_VALUE) { + return false; + } + + if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { + return false; + } + + } + + return true; } + // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address /** - * Gets the full path from a full fileName, which is the prefix + path, - * and also excluding the final directory separator. - * <p> - * This method will handle a file in either Unix or Windows format. - * The method is entirely text based, and returns the text before the - * last forward or backslash. - * <pre> - * C:\a\b\c.txt --> C:\a\b - * ~/a/b/c.txt --> ~/a/b - * a.txt --> "" - * a/b/c --> a/b - * a/b/c/ --> a/b/c - * C: --> C: - * C:\ --> C:\ - * ~ --> ~ - * ~/ --> ~ - * ~user --> ~user - * ~user/ --> ~user - * </pre> - * <p> - * The output will be the same irrespective of the machine that the code is running on. + * Checks whether a given string represents a valid IPv6 address. * - * @param fileName the fileName to query, null returns null - * @return the path of the file, an empty string if none exists, null if invalid + * @param inet6Address the name to validate + * @return true if the given name is a valid IPv6 address */ - public static String getFullPathNoEndSeparator(final String fileName) { - return doGetFullPath(fileName, false); + private static boolean isIPv6Address(final String inet6Address) { + final boolean containsCompressedZeroes = inet6Address.contains("::"); + if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) { + return false; + } + if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::")) + || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) { + return false; + } + String[] octets = inet6Address.split(":"); + if (containsCompressedZeroes) { + final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); + if (inet6Address.endsWith("::")) { + // String.split() drops ending empty segments + octetList.add(""); + } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { + octetList.remove(0); + } + octets = octetList.toArray(EMPTY_STRING_ARRAY); + } + if (octets.length > IPV6_MAX_HEX_GROUPS) { + return false; + } + int validOctets = 0; + int emptyOctets = 0; // consecutive empty chunks + for (int index = 0; index < octets.length; index++) { + final String octet = octets[index]; + if (octet.isEmpty()) { + emptyOctets++; + if (emptyOctets > 1) { + return false; + } + } else { + emptyOctets = 0; + // Is last chunk an IPv4 address? + if (index == octets.length - 1 && octet.contains(".")) { + if (!isIPv4Address(octet)) { + return false; + } + validOctets += 2; + continue; + } + if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { + return false; + } + final int octetInt; + try { + octetInt = Integer.parseInt(octet, BASE_16); + } catch (final NumberFormatException e) { + return false; + } + if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { + return false; + } + } + validOctets++; + } + return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); } /** - * Does the work of getting the path. + * Checks whether a given string is a valid host name according to + * RFC 3986 - not accepting IP addresses. * - * @param fileName the fileName - * @param includeSeparator true to include the end separator - * @return the path + * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" + * @param name the hostname to validate + * @return true if the given name is a valid host name */ - private static String doGetFullPath(final String fileName, final boolean includeSeparator) { - if (fileName == null) { - return null; - } - final int prefix = getPrefixLength(fileName); - if (prefix < 0) { - return null; - } - if (prefix >= fileName.length()) { - if (includeSeparator) { - return getPrefix(fileName); // add end slash if necessary + private static boolean isRFC3986HostName(final String name) { + final String[] parts = name.split("\\.", -1); + for (int i = 0; i < parts.length; i++) { + if (parts[i].isEmpty()) { + // trailing dot is legal, otherwise we've hit a .. sequence + return i == parts.length - 1; + } + if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { + return false; } - return fileName; - } - final int index = indexOfLastSeparator(fileName); - if (index < 0) { - return fileName.substring(0, prefix); - } - int end = index + (includeSeparator ? 1 : 0); - if (end == 0) { - end++; } - return fileName.substring(0, end); + return true; } /** - * Gets the name minus the path from a full fileName. - * <p> - * This method will handle a file in either Unix or Windows format. - * The text after the last forward or backslash is returned. - * <pre> - * a/b/c.txt --> c.txt - * a.txt --> a.txt - * a/b/c --> c - * a/b/c/ --> "" - * </pre> - * <p> - * The output will be the same irrespective of the machine that the code is running on. + * Checks if the character is a separator. * - * @param fileName the fileName to query, null returns null - * @return the name of the file without the path, or an empty string if none exists. - * Null bytes inside string will be removed + * @param ch the character to check + * @return true if it is a separator character */ - public static String getName(final String fileName) { - if (fileName == null) { - return null; - } - requireNonNullChars(fileName); - final int index = indexOfLastSeparator(fileName); - return fileName.substring(index + 1); + private static boolean isSeparator(final char ch) { + return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR; } /** - * Checks the input for null bytes, a sign of unsanitized data being passed to to file level functions. + * Determines if Windows file system is in use. * - * This may be used for poison byte attacks. + * @return true if the system is Windows + */ + static boolean isSystemWindows() { + return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR; + } + + /** + * Checks whether a given string is a valid host name according to + * RFC 3986. * - * @param path the path to check + * <p>Accepted are IP addresses (v4 and v6) as well as what the + * RFC calls a "reg-name". Percent encoded names don't seem to be + * valid names in UNC paths.</p> + * + * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" + * @param name the hostname to validate + * @return true if the given name is a valid host name */ - private static void requireNonNullChars(final String path) { - if (path.indexOf(0) >= 0) { - throw new IllegalArgumentException("Null byte present in file/path name. There are no " - + "known legitimate use cases for such data, but several injection attacks may use it"); - } + private static boolean isValidHostName(final String name) { + return isIPv6Address(name) || isRFC3986HostName(name); } /** - * Gets the base name, minus the full path and extension, from a full fileName. + * Normalizes a path, removing double and single dot path steps. * <p> - * This method will handle a file in either Unix or Windows format. - * The text after the last forward or backslash and before the last dot is returned. + * This method normalizes a path to a standard format. + * The input may contain separators in either Unix or Windows format. + * The output will contain separators in the format of the system. + * <p> + * A trailing slash will be retained. + * A double slash will be merged to a single slash (but UNC names are handled). + * A single dot path segment will be removed. + * A double dot will cause that path segment and the one before to be removed. + * If the double dot has no parent path segment to work with, {@code null} + * is returned. + * <p> + * The output will be the same on both Unix and Windows except + * for the separator character. * <pre> - * a/b/c.txt --> c - * a.txt --> a - * a/b/c --> c - * a/b/c/ --> "" + * /foo// --> /foo/ + * /foo/./ --> /foo/ + * /foo/../bar --> /bar + * /foo/../bar/ --> /bar/ + * /foo/../bar/../baz --> /baz + * //foo//./bar --> /foo/bar + * /../ --> null + * ../foo --> null + * foo/bar/.. --> foo/ + * foo/../../bar --> null + * foo/../bar --> bar + * //server/foo/../bar --> //server/bar + * //server/../bar --> null + * C:\foo\..\bar --> C:\bar + * C:\..\bar --> null + * ~/foo/../bar/ --> ~/bar/ + * ~/../bar --> null * </pre> - * <p> - * The output will be the same irrespective of the machine that the code is running on. + * (Note the file separator returned will be correct for Windows/Unix) * - * @param fileName the fileName to query, null returns null - * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string - * will be removed + * @param fileName the fileName to normalize, null returns null + * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed */ - public static String getBaseName(final String fileName) { - return removeExtension(getName(fileName)); + public static String normalize(final String fileName) { + return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true); } /** - * Gets the extension of a fileName. + * Normalizes a path, removing double and single dot path steps. * <p> - * This method returns the textual part of the fileName after the last dot. - * There must be no directory separator after the dot. + * This method normalizes a path to a standard format. + * The input may contain separators in either Unix or Windows format. + * The output will contain separators in the format specified. + * <p> + * A trailing slash will be retained. + * A double slash will be merged to a single slash (but UNC names are handled). + * A single dot path segment will be removed. + * A double dot will cause that path segment and the one before to be removed. + * If the double dot has no parent path segment to work with, {@code null} + * is returned. + * <p> + * The output will be the same on both Unix and Windows except + * for the separator character. * <pre> - * foo.txt --> "txt" - * a/b/c.jpg --> "jpg" - * a/b.txt/c --> "" - * a/b/c --> "" + * /foo// --> /foo/ + * /foo/./ --> /foo/ + * /foo/../bar --> /bar + * /foo/../bar/ --> /bar/ + * /foo/../bar/../baz --> /baz + * //foo//./bar --> /foo/bar + * /../ --> null + * ../foo --> null + * foo/bar/.. --> foo/ + * foo/../../bar --> null + * foo/../bar --> bar + * //server/foo/../bar --> //server/bar + * //server/../bar --> null + * C:\foo\..\bar --> C:\bar + * C:\..\bar --> null + * ~/foo/../bar/ --> ~/bar/ + * ~/../bar --> null * </pre> + * The output will be the same on both Unix and Windows including + * the separator character. + * + * @param fileName the fileName to normalize, null returns null + * @param unixSeparator {@code true} if a unix separator should + * be used or {@code false} if a windows separator should be used. + * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed + * @since 2.0 + */ + public static String normalize(final String fileName, final boolean unixSeparator) { + return doNormalize(fileName, toSeparator(unixSeparator), true); + } + + /** + * Normalizes a path, removing double and single dot path steps, + * and removing any final directory separator. * <p> - * The output will be the same irrespective of the machine that the code is running on, with the - * exception of a possible {@link IllegalArgumentException} on Windows (see below). - * </p> + * This method normalizes a path to a standard format. + * The input may contain separators in either Unix or Windows format. + * The output will contain separators in the format of the system. * <p> - * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". - * In this case, the name wouldn't be the name of a file, but the identifier of an - * alternate data stream (bar.txt) on the file foo.exe. The method used to return - * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing - * an {@link IllegalArgumentException} for names like this. - * - * @param fileName the fileName to retrieve the extension of. - * @return the extension of the file or an empty string if none exists or {@code null} - * if the fileName is {@code null}. - * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, - * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". + * A trailing slash will be removed. + * A double slash will be merged to a single slash (but UNC names are handled). + * A single dot path segment will be removed. + * A double dot will cause that path segment and the one before to be removed. + * If the double dot has no parent path segment to work with, {@code null} + * is returned. + * <p> + * The output will be the same on both Unix and Windows except + * for the separator character. + * <pre> + * /foo// --> /foo + * /foo/./ --> /foo + * /foo/../bar --> /bar + * /foo/../bar/ --> /bar + * /foo/../bar/../baz --> /baz + * //foo//./bar --> /foo/bar + * /../ --> null + * ../foo --> null + * foo/bar/.. --> foo + * foo/../../bar --> null + * foo/../bar --> bar + * //server/foo/../bar --> //server/bar + * //server/../bar --> null + * C:\foo\..\bar --> C:\bar + * C:\..\bar --> null + * ~/foo/../bar/ --> ~/bar + * ~/../bar --> null + * </pre> + * (Note the file separator returned will be correct for Windows/Unix) + * + * @param fileName the fileName to normalize, null returns null + * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed */ - public static String getExtension(final String fileName) throws IllegalArgumentException { - if (fileName == null) { - return null; - } - final int index = indexOfExtension(fileName); - if (index == NOT_FOUND) { - return EMPTY_STRING; - } - return fileName.substring(index + 1); + public static String normalizeNoEndSeparator(final String fileName) { + return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false); } /** - * Special handling for NTFS ADS: Don't accept colon in the fileName. + * Normalizes a path, removing double and single dot path steps, + * and removing any final directory separator. + * <p> + * This method normalizes a path to a standard format. + * The input may contain separators in either Unix or Windows format. + * The output will contain separators in the format specified. + * <p> + * A trailing slash will be removed. + * A double slash will be merged to a single slash (but UNC names are handled). + * A single dot path segment will be removed. + * A double dot will cause that path segment and the one before to be removed. + * If the double dot has no parent path segment to work with, {@code null} + * is returned. + * <p> + * The output will be the same on both Unix and Windows including + * the separator character. + * <pre> + * /foo// --> /foo + * /foo/./ --> /foo + * /foo/../bar --> /bar + * /foo/../bar/ --> /bar + * /foo/../bar/../baz --> /baz + * //foo//./bar --> /foo/bar + * /../ --> null + * ../foo --> null + * foo/bar/.. --> foo + * foo/../../bar --> null + * foo/../bar --> bar + * //server/foo/../bar --> //server/bar + * //server/../bar --> null + * C:\foo\..\bar --> C:\bar + * C:\..\bar --> null + * ~/foo/../bar/ --> ~/bar + * ~/../bar --> null + * </pre> * - * @param fileName a file name - * @return ADS offsets. + * @param fileName the fileName to normalize, null returns null + * @param unixSeparator {@code true} if a unix separator should + * be used or {@code false} if a windows separator should be used. + * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed + * @since 2.0 */ - private static int getAdsCriticalOffset(final String fileName) { - // Step 1: Remove leading path segments. - final int offset1 = fileName.lastIndexOf(SYSTEM_SEPARATOR); - final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); - if (offset1 == -1) { - if (offset2 == -1) { - return 0; - } - return offset2 + 1; - } - if (offset2 == -1) { - return offset1 + 1; - } - return Math.max(offset1, offset2) + 1; + public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { + return doNormalize(fileName, toSeparator(unixSeparator), false); } /** @@ -1126,186 +1397,107 @@ public class FilenameUtils { } /** - * Checks whether two fileNames are equal exactly. - * <p> - * No processing is performed on the fileNames other than comparison, - * thus this is merely a null-safe case-sensitive equals. - * - * @param fileName1 the first fileName to query, may be null - * @param fileName2 the second fileName to query, may be null - * @return true if the fileNames are equal, null equals null - * @see IOCase#SENSITIVE - */ - public static boolean equals(final String fileName1, final String fileName2) { - return equals(fileName1, fileName2, false, IOCase.SENSITIVE); - } - - /** - * Checks whether two fileNames are equal using the case rules of the system. - * <p> - * No processing is performed on the fileNames other than comparison. - * The check is case-sensitive on Unix and case-insensitive on Windows. - * - * @param fileName1 the first fileName to query, may be null - * @param fileName2 the second fileName to query, may be null - * @return true if the fileNames are equal, null equals null - * @see IOCase#SYSTEM - */ - public static boolean equalsOnSystem(final String fileName1, final String fileName2) { - return equals(fileName1, fileName2, false, IOCase.SYSTEM); - } - - /** - * Checks whether two fileNames are equal after both have been normalized. - * <p> - * Both fileNames are first passed to {@link #normalize(String)}. - * The check is then performed in a case-sensitive manner. + * Checks the input for null bytes, a sign of unsanitized data being passed to to file level functions. * - * @param fileName1 the first fileName to query, may be null - * @param fileName2 the second fileName to query, may be null - * @return true if the fileNames are equal, null equals null - * @see IOCase#SENSITIVE - */ - public static boolean equalsNormalized(final String fileName1, final String fileName2) { - return equals(fileName1, fileName2, true, IOCase.SENSITIVE); - } - - /** - * Checks whether two fileNames are equal after both have been normalized - * and using the case rules of the system. - * <p> - * Both fileNames are first passed to {@link #normalize(String)}. - * The check is then performed case-sensitive on Unix and - * case-insensitive on Windows. + * This may be used for poison byte attacks. * - * @param fileName1 the first fileName to query, may be null - * @param fileName2 the second fileName to query, may be null - * @return true if the fileNames are equal, null equals null - * @see IOCase#SYSTEM + * @param path the path to check */ - public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { - return equals(fileName1, fileName2, true, IOCase.SYSTEM); + private static void requireNonNullChars(final String path) { + if (path.indexOf(0) >= 0) { + throw new IllegalArgumentException("Null byte present in file/path name. There are no " + + "known legitimate use cases for such data, but several injection attacks may use it"); + } } - /** - * Checks whether two fileNames are equal, optionally normalizing and providing - * control over the case-sensitivity. + * Converts all separators to the system separator. * - * @param fileName1 the first fileName to query, may be null - * @param fileName2 the second fileName to query, may be null - * @param normalized whether to normalize the fileNames - * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive - * @return true if the fileNames are equal, null equals null - * @since 1.3 + * @param path the path to be changed, null ignored + * @return the updated path */ - public static boolean equals( - String fileName1, String fileName2, - final boolean normalized, IOCase caseSensitivity) { - - if (fileName1 == null || fileName2 == null) { - return fileName1 == null && fileName2 == null; - } - if (normalized) { - fileName1 = normalize(fileName1); - if (fileName1 == null) { - return false; - } - fileName2 = normalize(fileName2); - if (fileName2 == null) { - return false; - } - } - if (caseSensitivity == null) { - caseSensitivity = IOCase.SENSITIVE; + public static String separatorsToSystem(final String path) { + if (path == null) { + return null; } - return caseSensitivity.checkEquals(fileName1, fileName2); + return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path); } /** - * Checks whether the extension of the fileName is that specified. - * <p> - * This method obtains the extension as the textual part of the fileName - * after the last dot. There must be no directory separator after the dot. - * The extension check is case-sensitive on all platforms. + * Converts all separators to the Unix separator of forward slash. * - * @param fileName the fileName to query, null returns false - * @param extension the extension to check for, null or empty checks for no extension - * @return true if the fileName has the specified extension - * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes + * @param path the path to be changed, null ignored + * @return the updated path */ - public static boolean isExtension(final String fileName, final String extension) { - if (fileName == null) { - return false; - } - requireNonNullChars(fileName); - - if (isEmpty(extension)) { - return indexOfExtension(fileName) == NOT_FOUND; + public static String separatorsToUnix(final String path) { + if (path == null || path.indexOf(WINDOWS_NAME_SEPARATOR) == NOT_FOUND) { + return path; } - final String fileExt = getExtension(fileName); - return fileExt.equals(extension); + return path.replace(WINDOWS_NAME_SEPARATOR, UNIX_NAME_SEPARATOR); } /** - * Checks whether the extension of the fileName is one of those specified. - * <p> - * This method obtains the extension as the textual part of the fileName - * after the last dot. There must be no directory separator after the dot. - * The extension check is case-sensitive on all platforms. + * Converts all separators to the Windows separator of backslash. * - * @param fileName the fileName to query, null returns false - * @param extensions the extensions to check for, null checks for no extension - * @return true if the fileName is one of the extensions - * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes + * @param path the path to be changed, null ignored + * @return the updated path */ - public static boolean isExtension(final String fileName, final String... extensions) { - if (fileName == null) { - return false; - } - requireNonNullChars(fileName); - - if (extensions == null || extensions.length == 0) { - return indexOfExtension(fileName) == NOT_FOUND; - } - final String fileExt = getExtension(fileName); - for (final String extension : extensions) { - if (fileExt.equals(extension)) { - return true; - } + public static String separatorsToWindows(final String path) { + if (path == null || path.indexOf(UNIX_NAME_SEPARATOR) == NOT_FOUND) { + return path; } - return false; + return path.replace(UNIX_NAME_SEPARATOR, WINDOWS_NAME_SEPARATOR); } - /** - * Checks whether the extension of the fileName is one of those specified. - * <p> - * This method obtains the extension as the textual part of the fileName - * after the last dot. There must be no directory separator after the dot. - * The extension check is case-sensitive on all platforms. + * Splits a string into a number of tokens. + * The text is split by '?' and '*'. + * Where multiple '*' occur consecutively they are collapsed into a single '*'. * - * @param fileName the fileName to query, null returns false - * @param extensions the extensions to check for, null checks for no extension - * @return true if the fileName is one of the extensions - * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes + * @param text the text to split + * @return the array of tokens, never null */ - public static boolean isExtension(final String fileName, final Collection<String> extensions) { - if (fileName == null) { - return false; - } - requireNonNullChars(fileName); + static String[] splitOnTokens(final String text) { + // used by wildcardMatch + // package level so a unit test may run on this - if (extensions == null || extensions.isEmpty()) { - return indexOfExtension(fileName) == NOT_FOUND; + if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { + return new String[] { text }; } - final String fileExt = getExtension(fileName); - for (final String extension : extensions) { - if (fileExt.equals(extension)) { - return true; + + final char[] array = text.toCharArray(); + final ArrayList<String> list = new ArrayList<>(); + final StringBuilder buffer = new StringBuilder(); + char prevChar = 0; + for (final char ch : array) { + if (ch == '?' || ch == '*') { + if (buffer.length() != 0) { + list.add(buffer.toString()); + buffer.setLength(0); + } + if (ch == '?') { + list.add("?"); + } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' + list.add("*"); + } + } else { + buffer.append(ch); } + prevChar = ch; + } + if (buffer.length() != 0) { + list.add(buffer.toString()); } - return false; - } + return list.toArray(EMPTY_STRING_ARRAY); + } + /** + * Returns '/' if given true, '\\' otherwise. + * + * @param unixSeparator which separator to return. + * @return '/' if given true, '\\' otherwise. + */ + private static char toSeparator(final boolean unixSeparator) { + return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR; + } /** * Checks a fileName to see if it matches the specified wildcard matcher, * always testing case-sensitive. @@ -1334,32 +1526,6 @@ public class FilenameUtils { /** * Checks a fileName to see if it matches the specified wildcard matcher - * using the case rules of the system. - * <p> - * The wildcard matcher uses the characters '?' and '*' to represent a - * single or multiple (zero or more) wildcard characters. - * This is the same as often found on Dos/Unix command lines. - * The check is case-sensitive on Unix and case-insensitive on Windows. - * <pre> - * wildcardMatch("c.txt", "*.txt") --> true - * wildcardMatch("c.txt", "*.jpg") --> false - * wildcardMatch("a/b/c.txt", "a/b/*") --> true - * wildcardMatch("c.txt", "*.???") --> true - * wildcardMatch("c.txt", "*.????") --> false - * </pre> - * N.B. the sequence "*?" does not work properly at present in match strings. - * - * @param fileName the fileName to match on - * @param wildcardMatcher the wildcard string to match against - * @return true if the fileName matches the wildcard string - * @see IOCase#SYSTEM - */ - public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { - return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); - } - - /** - * Checks a fileName to see if it matches the specified wildcard matcher * allowing control over case-sensitivity. * <p> * The wildcard matcher uses the characters '?' and '*' to represent a @@ -1453,191 +1619,34 @@ public class FilenameUtils { } /** - * Splits a string into a number of tokens. - * The text is split by '?' and '*'. - * Where multiple '*' occur consecutively they are collapsed into a single '*'. - * - * @param text the text to split - * @return the array of tokens, never null - */ - static String[] splitOnTokens(final String text) { - // used by wildcardMatch - // package level so a unit test may run on this - - if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { - return new String[] { text }; - } - - final char[] array = text.toCharArray(); - final ArrayList<String> list = new ArrayList<>(); - final StringBuilder buffer = new StringBuilder(); - char prevChar = 0; - for (final char ch : array) { - if (ch == '?' || ch == '*') { - if (buffer.length() != 0) { - list.add(buffer.toString()); - buffer.setLength(0); - } - if (ch == '?') { - list.add("?"); - } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' - list.add("*"); - } - } else { - buffer.append(ch); - } - prevChar = ch; - } - if (buffer.length() != 0) { - list.add(buffer.toString()); - } - - return list.toArray(EMPTY_STRING_ARRAY); - } - - /** - * Checks whether a given string is a valid host name according to - * RFC 3986. - * - * <p>Accepted are IP addresses (v4 and v6) as well as what the - * RFC calls a "reg-name". Percent encoded names don't seem to be - * valid names in UNC paths.</p> - * - * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" - * @param name the hostname to validate - * @return true if the given name is a valid host name - */ - private static boolean isValidHostName(final String name) { - return isIPv6Address(name) || isRFC3986HostName(name); - } - - private static final Pattern IPV4_PATTERN = - Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); - private static final int IPV4_MAX_OCTET_VALUE = 255; - - /** - * Checks whether a given string represents a valid IPv4 address. - * - * @param name the name to validate - * @return true if the given name is a valid IPv4 address - */ - // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address - private static boolean isIPv4Address(final String name) { - final Matcher m = IPV4_PATTERN.matcher(name); - if (!m.matches() || m.groupCount() != 4) { - return false; - } - - // verify that address subgroups are legal - for (int i = 1; i <= 4; i++) { - final String ipSegment = m.group(i); - final int iIpSegment = Integer.parseInt(ipSegment); - if (iIpSegment > IPV4_MAX_OCTET_VALUE) { - return false; - } - - if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { - return false; - } - - } - - return true; - } - - private static final int IPV6_MAX_HEX_GROUPS = 8; - private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; - private static final int MAX_UNSIGNED_SHORT = 0xffff; - private static final int BASE_16 = 16; - - // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address - /** - * Checks whether a given string represents a valid IPv6 address. + * Checks a fileName to see if it matches the specified wildcard matcher + * using the case rules of the system. + * <p> + * The wildcard matcher uses the characters '?' and '*' to represent a + * single or multiple (zero or more) wildcard characters. + * This is the same as often found on Dos/Unix command lines. + * The check is case-sensitive on Unix and case-insensitive on Windows. + * <pre> + * wildcardMatch("c.txt", "*.txt") --> true + * wildcardMatch("c.txt", "*.jpg") --> false + * wildcardMatch("a/b/c.txt", "a/b/*") --> true + * wildcardMatch("c.txt", "*.???") --> true + * wildcardMatch("c.txt", "*.????") --> false + * </pre> + * N.B. the sequence "*?" does not work properly at present in match strings. * - * @param inet6Address the name to validate - * @return true if the given name is a valid IPv6 address + * @param fileName the fileName to match on + * @param wildcardMatcher the wildcard string to match against + * @return true if the fileName matches the wildcard string + * @see IOCase#SYSTEM */ - private static boolean isIPv6Address(final String inet6Address) { - final boolean containsCompressedZeroes = inet6Address.contains("::"); - if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) { - return false; - } - if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::")) - || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) { - return false; - } - String[] octets = inet6Address.split(":"); - if (containsCompressedZeroes) { - final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); - if (inet6Address.endsWith("::")) { - // String.split() drops ending empty segments - octetList.add(""); - } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { - octetList.remove(0); - } - octets = octetList.toArray(EMPTY_STRING_ARRAY); - } - if (octets.length > IPV6_MAX_HEX_GROUPS) { - return false; - } - int validOctets = 0; - int emptyOctets = 0; // consecutive empty chunks - for (int index = 0; index < octets.length; index++) { - final String octet = octets[index]; - if (octet.isEmpty()) { - emptyOctets++; - if (emptyOctets > 1) { - return false; - } - } else { - emptyOctets = 0; - // Is last chunk an IPv4 address? - if (index == octets.length - 1 && octet.contains(".")) { - if (!isIPv4Address(octet)) { - return false; - } - validOctets += 2; - continue; - } - if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { - return false; - } - final int octetInt; - try { - octetInt = Integer.parseInt(octet, BASE_16); - } catch (final NumberFormatException e) { - return false; - } - if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { - return false; - } - } - validOctets++; - } - return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); + public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { + return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); } - private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); - /** - * Checks whether a given string is a valid host name according to - * RFC 3986 - not accepting IP addresses. - * - * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" - * @param name the hostname to validate - * @return true if the given name is a valid host name + * Instances should NOT be constructed in standard programming. */ - private static boolean isRFC3986HostName(final String name) { - final String[] parts = name.split("\\.", -1); - for (int i = 0; i < parts.length; i++) { - if (parts[i].isEmpty()) { - // trailing dot is legal, otherwise we've hit a .. sequence - return i == parts.length - 1; - } - if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { - return false; - } - } - return true; + public FilenameUtils() { } }
