Author: sebb
Date: Tue Jan  6 02:37:09 2015
New Revision: 1649721

URL: http://svn.apache.org/r1649721
Log:
VALIDATOR-235 UrlValidator rejects url with Unicode characters in domain label 
or TLD
Add URL test and fix up domain so it works

Modified:
    
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java
    
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
    
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java

Modified: 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java
URL: 
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java?rev=1649721&r1=1649720&r2=1649721&view=diff
==============================================================================
--- 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java
 (original)
+++ 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/DomainValidator.java
 Tue Jan  6 02:37:09 2015
@@ -1070,7 +1070,8 @@ public class DomainValidator implements
      * @param input the string to convert, not null
      * @return converted input, or original input if conversion fails
      */
-    private static String unicodeToASCII(String input) {
+    // Needed by UrlValidator
+    static String unicodeToASCII(String input) {
         try {
             return /* java.net.IDN. */ toASCII(input);
         } catch (IllegalArgumentException e) { // input is not valid

Modified: 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
URL: 
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java?rev=1649721&r1=1649720&r2=1649721&view=diff
==============================================================================
--- 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
 (original)
+++ 
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
 Tue Jan  6 02:37:09 2015
@@ -160,6 +160,12 @@ public class UrlValidator implements Ser
     private static final String PORT_REGEX = "^:(\\d{1,5})$";
     private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX);
 
+    // Pattern to extract domain for IDN conversion
+    private static final Pattern HTTP_IDN_PATTERN = 
Pattern.compile("(https?://)([^/]+)(.*)", Pattern.CASE_INSENSITIVE);
+    private static final int PARSE_HTTP_IDN_SCHEME = 1;
+    private static final int PARSE_HTTP_IDN_AUTH = 2;
+    private static final int PARSE_HTTP_IDN_REST = 3;
+
     /**
      * Holds the set of current validation options.
      */
@@ -290,7 +296,19 @@ public class UrlValidator implements Ser
         }
 
         if (!ASCII_PATTERN.matcher(value).matches()) {
-            return false;
+            // Non-ASCII input, try and convert HTTP domain
+            Matcher httpMatcher = HTTP_IDN_PATTERN.matcher(value);
+            if (httpMatcher.lookingAt()) { // We have an http(s) URL
+                value =   httpMatcher.group(PARSE_HTTP_IDN_SCHEME)
+                        + 
DomainValidator.unicodeToASCII(httpMatcher.group(PARSE_HTTP_IDN_AUTH)) 
+                        + httpMatcher.group(PARSE_HTTP_IDN_REST);
+                if (!ASCII_PATTERN.matcher(value).matches()) {
+                    return false;
+                }
+                // Drop thru, we were able to convert the pattern
+            } else {
+                return false;
+            }
         }
 
         // Check the whole url address structure

Modified: 
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java?rev=1649721&r1=1649720&r2=1649721&view=diff
==============================================================================
--- 
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
 (original)
+++ 
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
 Tue Jan  6 02:37:09 2015
@@ -142,6 +142,19 @@ public class UrlValidatorTest extends Te
                validator.isValid("http://somewhere.com/pathxyz/file(1).html"));
    }
 
+   public void testValidator235() {
+       String version = System.getProperty("java.version");
+       if (version.compareTo("1.6") < 0) {
+           System.out.println("Cannot run Unicode IDN tests");
+           return; // Cannot run the test
+       }
+       UrlValidator validator = new UrlValidator();
+       assertTrue("xn--d1abbgf6aiiy.xn--p1ai should validate", 
validator.isValid("http://xn--d1abbgf6aiiy.xn--p1ai";));
+       assertTrue("президент.рф should validate", 
validator.isValid("http://президент.рф";));
+       assertTrue("www.b\u00fccher.ch should validate", 
validator.isValid("http://www.b\u00fccher.ch";));
+       assertFalse("www.\uFFFD.ch FFFD should fail", 
validator.isValid("http://www.\uFFFD.ch";));
+   }
+
     public void testValidator248() {
         RegexValidator regex = new RegexValidator(new String[] {"localhost", 
".*\\.my-testing"});
         UrlValidator validator = new UrlValidator(regex, 0);


Reply via email to