Author: sebb Date: Tue Jan 6 20:19:21 2015 New Revision: 1649932 URL: http://svn.apache.org/r1649932 Log: VALIDATOR-235 UrlValidator rejects url with Unicode characters in domain label or TLD Allow IDN domains for all schemes
Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java?rev=1649932&r1=1649931&r2=1649932&view=diff ============================================================================== --- commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java (original) +++ commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java Tue Jan 6 20:19:21 2015 @@ -100,10 +100,6 @@ public class UrlValidator implements Ser */ public static final long ALLOW_LOCAL_URLS = 1 << 3; - // Drop numeric, and "+-." for now - // TODO does not allow for optional userinfo. Does not enforce initial alphanumeric. - private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\."; - /** * This expression derived/taken from the BNF for URI (RFC2396). */ @@ -134,6 +130,11 @@ public class UrlValidator implements Ser private static final String SCHEME_REGEX = "^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*"; private static final Pattern SCHEME_PATTERN = Pattern.compile(SCHEME_REGEX); + // Drop numeric, and "+-." for now + // TODO does not allow for optional userinfo. + // Validation of character set is done by isValidAuthority + private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\."; + private static final String AUTHORITY_REGEX = "^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?"; // 1 2 3 @@ -144,7 +145,7 @@ public class UrlValidator implements Ser private static final int PARSE_AUTHORITY_PORT = 2; /** - * Should always be empty. + * Should always be empty. The code currently allows spaces. */ private static final int PARSE_AUTHORITY_EXTRA = 3; @@ -154,18 +155,9 @@ public class UrlValidator implements Ser private static final String QUERY_REGEX = "^(.*)$"; private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX); - private static final String LEGAL_ASCII_REGEX = "^\\p{ASCII}+$"; - private static final Pattern ASCII_PATTERN = Pattern.compile(LEGAL_ASCII_REGEX); - private static final String PORT_REGEX = "^:(\\d{1,5})$"; private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX); - // Pattern to extract domain for IDN conversion - private static final Pattern HTTP_IDN_PATTERN = Pattern.compile("(https?://)([^/]+)(.*)", Pattern.CASE_INSENSITIVE); - private static final int PARSE_HTTP_IDN_SCHEME = 1; - private static final int PARSE_HTTP_IDN_AUTH = 2; - private static final int PARSE_HTTP_IDN_REST = 3; - /** * Holds the set of current validation options. */ @@ -295,22 +287,6 @@ public class UrlValidator implements Ser return false; } - if (!ASCII_PATTERN.matcher(value).matches()) { - // Non-ASCII input, try and convert HTTP domain - Matcher httpMatcher = HTTP_IDN_PATTERN.matcher(value); - if (httpMatcher.lookingAt()) { // We have an http(s) URL - value = httpMatcher.group(PARSE_HTTP_IDN_SCHEME) - + DomainValidator.unicodeToASCII(httpMatcher.group(PARSE_HTTP_IDN_AUTH)) - + httpMatcher.group(PARSE_HTTP_IDN_REST); - if (!ASCII_PATTERN.matcher(value).matches()) { - return false; - } - // Drop thru, we were able to convert the pattern - } else { - return false; - } - } - // Check the whole url address structure Matcher urlMatcher = URL_PATTERN.matcher(value); if (!urlMatcher.matches()) { @@ -324,11 +300,11 @@ public class UrlValidator implements Ser String authority = urlMatcher.group(PARSE_URL_AUTHORITY); if ("file".equals(scheme) && "".equals(authority)) { - // Special case - file: allows an empty authority + // Special case - file: allows an empty authority } else { - // Validate the authority - if (!isValidAuthority(authority)) { - return false; + // Validate the authority + if (!isValidAuthority(authority)) { + return false; } } @@ -380,7 +356,7 @@ public class UrlValidator implements Ser * If a RegexValidator was supplied and it matches, then the authority is regarded * as valid with no further checks, otherwise the method checks against the * AUTHORITY_PATTERN and the DomainValidator (ALLOW_LOCAL_URLS) - * @param authority Authority value to validate. + * @param authority Authority value to validate, alllows IDN * @return true if authority (hostname and port) is valid. */ protected boolean isValidAuthority(String authority) { @@ -392,8 +368,10 @@ public class UrlValidator implements Ser if (authorityValidator != null && authorityValidator.isValid(authority)) { return true; } + // convert to ASCII if possible + final String authorityASCII = DomainValidator.unicodeToASCII(authority); - Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority); + Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authorityASCII); if (!authorityMatcher.matches()) { return false; } Modified: commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java?rev=1649932&r1=1649931&r2=1649932&view=diff ============================================================================== --- commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java (original) +++ commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java Tue Jan 6 20:19:21 2015 @@ -153,6 +153,8 @@ public class UrlValidatorTest extends Te assertTrue("пÑезиденÑ.ÑÑ should validate", validator.isValid("http://пÑезиденÑ.ÑÑ")); assertTrue("www.b\u00fccher.ch should validate", validator.isValid("http://www.b\u00fccher.ch")); assertFalse("www.\uFFFD.ch FFFD should fail", validator.isValid("http://www.\uFFFD.ch")); + assertTrue("www.b\u00fccher.ch should validate", validator.isValid("ftp://www.b\u00fccher.ch")); + assertFalse("www.\uFFFD.ch FFFD should fail", validator.isValid("ftp://www.\uFFFD.ch")); } public void testValidator248() {