Author: sebb Date: Tue Jan 6 11:26:31 2015 New Revision: 1649783 URL: http://svn.apache.org/r1649783 Log: VALIDATOR-235 UrlValidator rejects url with Unicode characters in domain label or TLD Check total length after conversion - URLs must be equally reachable in punycode and Unicode
Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java Modified: commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java URL: http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java?rev=1649783&r1=1649782&r2=1649783&view=diff ============================================================================== --- commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java (original) +++ commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java Tue Jan 6 11:26:31 2015 @@ -145,10 +145,17 @@ public class DomainValidator implements * @return true if the parameter is a valid domain name */ public boolean isValid(String domain) { - if (domain == null || domain.length() > 253) { + if (domain == null) { + return false; + } + domain = unicodeToASCII(domain); + // hosts must be equally reachable via punycode and Unicode; + // Unicode is never shorter than punycode, so check punycode + // if domain did not convert, then it will be caught by ASCII + // checks in the regexes below + if (domain.length() > 253) { return false; } - domain = unicodeToASCII(domain); // TODO should this be before the length check? String[] groups = domainRegex.match(domain); if (groups != null && groups.length > 0) { return isValidTld(groups[0]); @@ -159,10 +166,17 @@ public class DomainValidator implements // package protected for unit test access // must agree with isValid() above final boolean isValidDomainSyntax(String domain) { - if (domain == null || domain.length() > 253) { + if (domain == null) { + return false; + } + domain = unicodeToASCII(domain); + // hosts must be equally reachable via punycode and Unicode; + // Unicode is never shorter than punycode, so check punycode + // if domain did not convert, then it will be caught by ASCII + // checks in the regexes below + if (domain.length() > 253) { return false; } - domain = unicodeToASCII(domain); // TODO should this be before the length check? String[] groups = domainRegex.match(domain); return (groups != null && groups.length > 0) || hostnameRegex.isValid(domain);