Author: sebb Date: Tue Jan 6 02:37:09 2015 New Revision: 1649721 URL: http://svn.apache.org/r1649721 Log: VALIDATOR-235 UrlValidator rejects url with Unicode characters in domain label or TLD Add URL test and fix up domain so it works
Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java?rev=1649721&r1=1649720&r2=1649721&view=diff ============================================================================== --- commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java (original) +++ commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java Tue Jan 6 02:37:09 2015 @@ -1070,7 +1070,8 @@ public class DomainValidator implements * @param input the string to convert, not null * @return converted input, or original input if conversion fails */ - private static String unicodeToASCII(String input) { + // Needed by UrlValidator + static String unicodeToASCII(String input) { try { return /* java.net.IDN. */ toASCII(input); } catch (IllegalArgumentException e) { // input is not valid Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java?rev=1649721&r1=1649720&r2=1649721&view=diff ============================================================================== --- commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java (original) +++ commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java Tue Jan 6 02:37:09 2015 @@ -160,6 +160,12 @@ public class UrlValidator implements Ser private static final String PORT_REGEX = "^:(\\d{1,5})$"; private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX); + // Pattern to extract domain for IDN conversion + private static final Pattern HTTP_IDN_PATTERN = Pattern.compile("(https?://)([^/]+)(.*)", Pattern.CASE_INSENSITIVE); + private static final int PARSE_HTTP_IDN_SCHEME = 1; + private static final int PARSE_HTTP_IDN_AUTH = 2; + private static final int PARSE_HTTP_IDN_REST = 3; + /** * Holds the set of current validation options. */ @@ -290,7 +296,19 @@ public class UrlValidator implements Ser } if (!ASCII_PATTERN.matcher(value).matches()) { - return false; + // Non-ASCII input, try and convert HTTP domain + Matcher httpMatcher = HTTP_IDN_PATTERN.matcher(value); + if (httpMatcher.lookingAt()) { // We have an http(s) URL + value = httpMatcher.group(PARSE_HTTP_IDN_SCHEME) + + DomainValidator.unicodeToASCII(httpMatcher.group(PARSE_HTTP_IDN_AUTH)) + + httpMatcher.group(PARSE_HTTP_IDN_REST); + if (!ASCII_PATTERN.matcher(value).matches()) { + return false; + } + // Drop thru, we were able to convert the pattern + } else { + return false; + } } // Check the whole url address structure Modified: commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java?rev=1649721&r1=1649720&r2=1649721&view=diff ============================================================================== --- commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java (original) +++ commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java Tue Jan 6 02:37:09 2015 @@ -142,6 +142,19 @@ public class UrlValidatorTest extends Te validator.isValid("http://somewhere.com/pathxyz/file(1).html")); } + public void testValidator235() { + String version = System.getProperty("java.version"); + if (version.compareTo("1.6") < 0) { + System.out.println("Cannot run Unicode IDN tests"); + return; // Cannot run the test + } + UrlValidator validator = new UrlValidator(); + assertTrue("xn--d1abbgf6aiiy.xn--p1ai should validate", validator.isValid("http://xn--d1abbgf6aiiy.xn--p1ai")); + assertTrue("пÑезиденÑ.ÑÑ should validate", validator.isValid("http://пÑезиденÑ.ÑÑ")); + assertTrue("www.b\u00fccher.ch should validate", validator.isValid("http://www.b\u00fccher.ch")); + assertFalse("www.\uFFFD.ch FFFD should fail", validator.isValid("http://www.\uFFFD.ch")); + } + public void testValidator248() { RegexValidator regex = new RegexValidator(new String[] {"localhost", ".*\\.my-testing"}); UrlValidator validator = new UrlValidator(regex, 0);