This is an automated email from the ASF dual-hosted git repository. sebb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-validator.git
The following commit(s) were added to refs/heads/master by this push: new f8e4d12 VALIDATOR-283 and VALIDATOR-472 f8e4d12 is described below commit f8e4d12aba0872b1f82fd26729a4a23b1bb5fc9e Author: Sebb <s...@apache.org> AuthorDate: Mon Jul 27 22:52:39 2020 +0100 VALIDATOR-283 and VALIDATOR-472 --- src/changes/changes.xml | 6 ++ .../commons/validator/routines/UrlValidator.java | 71 +++------------------- .../validator/routines/UrlValidatorTest.java | 18 +++++- 3 files changed, 31 insertions(+), 64 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 7eba2d1..1326489 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -99,6 +99,12 @@ Apache Commons Validator. For the current list of dependencies, please see http://commons.apache.org/validator/dependencies.html "> + <action issue="VALIDATOR-472" type="fix" dev="sebb"> + UrlValidator should not be more lax than java.net.URI + </action> + <action issue="VALIDATOR-283" type="fix" dev="sebb" due-to="RC Johnson"> + URLValidator should check for illegal Hex characters + </action> <action issue="VALIDATOR-445" type="fix" dev="sebb" due-to="devson"> Inet6Address may also contain a scope id </action> diff --git a/src/main/java/org/apache/commons/validator/routines/UrlValidator.java b/src/main/java/org/apache/commons/validator/routines/UrlValidator.java index 29baa47..ac84762 100644 --- a/src/main/java/org/apache/commons/validator/routines/UrlValidator.java +++ b/src/main/java/org/apache/commons/validator/routines/UrlValidator.java @@ -105,30 +105,6 @@ public class UrlValidator implements Serializable { public static final long ALLOW_LOCAL_URLS = 1 << 3; // CHECKSTYLE IGNORE MagicNumber /** - * This expression derived/taken from the BNF for URI (RFC2396). - */ - private static final String URL_REGEX = - "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; - // 12 3 4 5 6 7 8 9 - private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX); - - /** - * Schema/Protocol (ie. http:, ftp:, file:, etc). - */ - private static final int PARSE_URL_SCHEME = 2; - - /** - * Includes hostname/ip and port number. - */ - private static final int PARSE_URL_AUTHORITY = 4; - - private static final int PARSE_URL_PATH = 5; - - private static final int PARSE_URL_QUERY = 7; - - private static final int PARSE_URL_FRAGMENT = 9; - - /** * Protocol scheme (e.g. http, ftp, https). */ private static final String SCHEME_REGEX = "^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*"; @@ -301,18 +277,20 @@ public class UrlValidator implements Serializable { return false; } - // Check the whole url address structure - Matcher urlMatcher = URL_PATTERN.matcher(value); - if (!urlMatcher.matches()) { + URI uri; // ensure value is a valid URI + try { + uri = new URI(value); + } catch (URISyntaxException e) { return false; } + // OK, perfom additional validation - String scheme = urlMatcher.group(PARSE_URL_SCHEME); + String scheme = uri.getScheme(); if (!isValidScheme(scheme)) { return false; } - String authority = urlMatcher.group(PARSE_URL_AUTHORITY); + String authority = uri.getRawAuthority(); if ("file".equals(scheme) && (authority == null || "".equals(authority))) {// Special case - file: allows an empty authority return true; // this is a local file - nothing more to do here } else if ("file".equals(scheme) && authority != null && authority.contains(":")) { @@ -324,15 +302,15 @@ public class UrlValidator implements Serializable { } } - if (!isValidPath(urlMatcher.group(PARSE_URL_PATH))) { + if (!isValidPath(uri.getRawPath())) { return false; } - if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) { + if (!isValidQuery(uri.getRawQuery())) { return false; } - if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) { + if (!isValidFragment(uri.getRawFragment())) { return false; } @@ -536,17 +514,11 @@ public class UrlValidator implements Serializable { return (options & flag) == 0; } - // Unit test access to pattern matcher - Matcher matchURL(String value) { - return URL_PATTERN.matcher(value); - } - /** * Validator for checking URL parsing * @param args - URLs to validate */ public static void main(String[] args) { - UrlValidator val = new UrlValidator(new String[] { "file", "http", "https" }, UrlValidator.ALLOW_LOCAL_URLS); for(String arg: args) { try { URI uri = new URI(arg); @@ -567,29 +539,6 @@ public class UrlValidator implements Serializable { } catch (URISyntaxException e) { System.out.println(e.getMessage()); } - Matcher m = val.matchURL(arg); - if (m.matches()) { - System.out.printf("%s has %d parts%n",arg,m.groupCount()); - for(int i=1;i <m.groupCount(); i++) { - String grp = m.group(i); - if (grp != null) { - System.out.printf("%d: %s%n",i, grp); - } - } - String authority = m.group(PARSE_URL_AUTHORITY); - String path = m.group(PARSE_URL_PATH); - String query = m.group(PARSE_URL_QUERY); - String frag = m.group(PARSE_URL_FRAGMENT); - System.out.printf("auth: %s %s%n", authority,val.isValidAuthority(authority)); - System.out.printf("path: %s %s%n", path,val.isValidPath(path)); - System.out.printf("query: %s %s%n", query,val.isValidQuery(query)); - System.out.printf("frag: %s %s%n", frag,val.isValidFragment(frag)); - System.out.printf("valid: %s %s%n", arg,val.isValid(arg)); - System.out.println(); - } else { - System.out.printf("%s is not valid%n",arg); - } - } } diff --git a/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java b/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java index c67926d..8cbda0d 100644 --- a/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java +++ b/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java @@ -254,7 +254,7 @@ public class UrlValidatorTest extends TestCase { assertTrue("file:///c:/ should now be allowed", validator.isValid("file:///C:/some.file")); - assertTrue("file:///c:\\ should be allowed", + assertFalse("file:///c:\\ should not be allowed", // Only allow forward slashes validator.isValid("file:///C:\\some.file")); assertTrue("file:///etc/ should now be allowed", @@ -333,9 +333,7 @@ public class UrlValidatorTest extends TestCase { public void testValidator464() { String[] schemes = {"file"}; UrlValidator urlValidator = new UrlValidator(schemes); - String fileOK = "file:///bad ^ domain.com/label/test"; // TODO should '^' be allowed unescaped? String fileNAK = "file://bad ^ domain.com/label/test"; - assertTrue(fileOK, urlValidator.isValid(fileOK)); assertFalse(fileNAK, urlValidator.isValid(fileNAK)); } @@ -519,6 +517,20 @@ public class UrlValidatorTest extends TestCase { assertTrue(validator.isValid("http://example.com//_test")); // VALIDATOR-429 } + public void testValidator283() { + UrlValidator validator = new UrlValidator(); + assertFalse(validator.isValid("http://finance.yahoo.com/news/Owners-54B-NY-housing-apf-2493139299.html?x=0&ap=%fr")); + assertTrue(validator.isValid("http://finance.yahoo.com/news/Owners-54B-NY-housing-apf-2493139299.html?x=0&ap=%22")); + } + + public void testFragments() { + String[] schemes = {"http","https"}; + UrlValidator urlValidator = new UrlValidator(schemes, UrlValidator.NO_FRAGMENTS); + assertFalse(urlValidator.isValid("http://apache.org/a/b/c#frag")); + urlValidator = new UrlValidator(schemes); + assertTrue(urlValidator.isValid("http://apache.org/a/b/c#frag")); +} + //-------------------- Test data for creating a composite URL /** * The data given below approximates the 4 parts of a URL