Author: sebb
Date: Tue Jan  6 20:19:21 2015
New Revision: 1649932

URL: http://svn.apache.org/r1649932
Log:
VALIDATOR-235 UrlValidator rejects url with Unicode characters in domain label 
or TLD
Allow IDN domains for all schemes

Modified:
    
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
    
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java

Modified: 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
URL: 
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java?rev=1649932&r1=1649931&r2=1649932&view=diff
==============================================================================
--- 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
 (original)
+++ 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
 Tue Jan  6 20:19:21 2015
@@ -100,10 +100,6 @@ public class UrlValidator implements Ser
      */
     public static final long ALLOW_LOCAL_URLS = 1 << 3;
 
-    // Drop numeric, and  "+-." for now
-    // TODO does not allow for optional userinfo. Does not enforce initial 
alphanumeric.
-    private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
-
     /**
      * This expression derived/taken from the BNF for URI (RFC2396).
      */
@@ -134,6 +130,11 @@ public class UrlValidator implements Ser
     private static final String SCHEME_REGEX = 
"^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*";
     private static final Pattern SCHEME_PATTERN = 
Pattern.compile(SCHEME_REGEX);
 
+    // Drop numeric, and  "+-." for now
+    // TODO does not allow for optional userinfo. 
+    // Validation of character set is done by isValidAuthority
+    private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
+
     private static final String AUTHORITY_REGEX =
             "^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?";
     //        1                                 2       3
@@ -144,7 +145,7 @@ public class UrlValidator implements Ser
     private static final int PARSE_AUTHORITY_PORT = 2;
 
     /**
-     * Should always be empty.
+     * Should always be empty. The code currently allows spaces.
      */
     private static final int PARSE_AUTHORITY_EXTRA = 3;
 
@@ -154,18 +155,9 @@ public class UrlValidator implements Ser
     private static final String QUERY_REGEX = "^(.*)$";
     private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX);
 
-    private static final String LEGAL_ASCII_REGEX = "^\\p{ASCII}+$";
-    private static final Pattern ASCII_PATTERN = 
Pattern.compile(LEGAL_ASCII_REGEX);
-
     private static final String PORT_REGEX = "^:(\\d{1,5})$";
     private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX);
 
-    // Pattern to extract domain for IDN conversion
-    private static final Pattern HTTP_IDN_PATTERN = 
Pattern.compile("(https?://)([^/]+)(.*)", Pattern.CASE_INSENSITIVE);
-    private static final int PARSE_HTTP_IDN_SCHEME = 1;
-    private static final int PARSE_HTTP_IDN_AUTH = 2;
-    private static final int PARSE_HTTP_IDN_REST = 3;
-
     /**
      * Holds the set of current validation options.
      */
@@ -295,22 +287,6 @@ public class UrlValidator implements Ser
             return false;
         }
 
-        if (!ASCII_PATTERN.matcher(value).matches()) {
-            // Non-ASCII input, try and convert HTTP domain
-            Matcher httpMatcher = HTTP_IDN_PATTERN.matcher(value);
-            if (httpMatcher.lookingAt()) { // We have an http(s) URL
-                value =   httpMatcher.group(PARSE_HTTP_IDN_SCHEME)
-                        + 
DomainValidator.unicodeToASCII(httpMatcher.group(PARSE_HTTP_IDN_AUTH)) 
-                        + httpMatcher.group(PARSE_HTTP_IDN_REST);
-                if (!ASCII_PATTERN.matcher(value).matches()) {
-                    return false;
-                }
-                // Drop thru, we were able to convert the pattern
-            } else {
-                return false;
-            }
-        }
-
         // Check the whole url address structure
         Matcher urlMatcher = URL_PATTERN.matcher(value);
         if (!urlMatcher.matches()) {
@@ -324,11 +300,11 @@ public class UrlValidator implements Ser
 
         String authority = urlMatcher.group(PARSE_URL_AUTHORITY);
         if ("file".equals(scheme) && "".equals(authority)) {
-           // Special case - file: allows an empty authority
+            // Special case - file: allows an empty authority
         } else {
-           // Validate the authority
-           if (!isValidAuthority(authority)) {
-               return false;
+            // Validate the authority
+            if (!isValidAuthority(authority)) {
+                return false;
             }
         }
 
@@ -380,7 +356,7 @@ public class UrlValidator implements Ser
      * If a RegexValidator was supplied and it matches, then the authority is 
regarded
      * as valid with no further checks, otherwise the method checks against the
      * AUTHORITY_PATTERN and the DomainValidator (ALLOW_LOCAL_URLS)
-     * @param authority Authority value to validate.
+     * @param authority Authority value to validate, alllows IDN
      * @return true if authority (hostname and port) is valid.
      */
     protected boolean isValidAuthority(String authority) {
@@ -392,8 +368,10 @@ public class UrlValidator implements Ser
         if (authorityValidator != null && 
authorityValidator.isValid(authority)) {
             return true;
         }
+        // convert to ASCII if possible
+        final String authorityASCII = 
DomainValidator.unicodeToASCII(authority);
 
-        Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
+        Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authorityASCII);
         if (!authorityMatcher.matches()) {
             return false;
         }

Modified: 
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java?rev=1649932&r1=1649931&r2=1649932&view=diff
==============================================================================
--- 
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
 (original)
+++ 
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
 Tue Jan  6 20:19:21 2015
@@ -153,6 +153,8 @@ public class UrlValidatorTest extends Te
        assertTrue("президент.рф should validate", 
validator.isValid("http://президент.рф";));
        assertTrue("www.b\u00fccher.ch should validate", 
validator.isValid("http://www.b\u00fccher.ch";));
        assertFalse("www.\uFFFD.ch FFFD should fail", 
validator.isValid("http://www.\uFFFD.ch";));
+       assertTrue("www.b\u00fccher.ch should validate", 
validator.isValid("ftp://www.b\u00fccher.ch";));
+       assertFalse("www.\uFFFD.ch FFFD should fail", 
validator.isValid("ftp://www.\uFFFD.ch";));
    }
 
     public void testValidator248() {


Reply via email to