This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-validator.git


The following commit(s) were added to refs/heads/master by this push:
     new f8e4d12  VALIDATOR-283 and VALIDATOR-472
f8e4d12 is described below

commit f8e4d12aba0872b1f82fd26729a4a23b1bb5fc9e
Author: Sebb <s...@apache.org>
AuthorDate: Mon Jul 27 22:52:39 2020 +0100

    VALIDATOR-283 and VALIDATOR-472
---
 src/changes/changes.xml                            |  6 ++
 .../commons/validator/routines/UrlValidator.java   | 71 +++-------------------
 .../validator/routines/UrlValidatorTest.java       | 18 +++++-
 3 files changed, 31 insertions(+), 64 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 7eba2d1..1326489 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -99,6 +99,12 @@ Apache Commons Validator.
 For the current list of dependencies, please see
 http://commons.apache.org/validator/dependencies.html
   ">
+    <action issue="VALIDATOR-472" type="fix" dev="sebb">
+    UrlValidator should not be more lax than java.net.URI
+    </action>
+    <action issue="VALIDATOR-283" type="fix" dev="sebb" due-to="RC Johnson">
+    URLValidator should check for illegal Hex characters
+    </action>
     <action issue="VALIDATOR-445" type="fix" dev="sebb" due-to="devson">
     Inet6Address may also contain a scope id
     </action>
diff --git 
a/src/main/java/org/apache/commons/validator/routines/UrlValidator.java 
b/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
index 29baa47..ac84762 100644
--- a/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
+++ b/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
@@ -105,30 +105,6 @@ public class UrlValidator implements Serializable {
     public static final long ALLOW_LOCAL_URLS = 1 << 3; // CHECKSTYLE IGNORE 
MagicNumber
 
     /**
-     * This expression derived/taken from the BNF for URI (RFC2396).
-     */
-    private static final String URL_REGEX =
-            "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
-    //        12            3  4          5       6   7        8 9
-    private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
-
-    /**
-     * Schema/Protocol (ie. http:, ftp:, file:, etc).
-     */
-    private static final int PARSE_URL_SCHEME = 2;
-
-    /**
-     * Includes hostname/ip and port number.
-     */
-    private static final int PARSE_URL_AUTHORITY = 4;
-
-    private static final int PARSE_URL_PATH = 5;
-
-    private static final int PARSE_URL_QUERY = 7;
-
-    private static final int PARSE_URL_FRAGMENT = 9;
-
-    /**
      * Protocol scheme (e.g. http, ftp, https).
      */
     private static final String SCHEME_REGEX = 
"^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*";
@@ -301,18 +277,20 @@ public class UrlValidator implements Serializable {
             return false;
         }
 
-        // Check the whole url address structure
-        Matcher urlMatcher = URL_PATTERN.matcher(value);
-        if (!urlMatcher.matches()) {
+        URI uri; // ensure value is a valid URI
+        try {
+            uri = new URI(value);
+        } catch (URISyntaxException e) {
             return false;
         }
+        // OK, perfom additional validation
 
-        String scheme = urlMatcher.group(PARSE_URL_SCHEME);
+        String scheme = uri.getScheme();
         if (!isValidScheme(scheme)) {
             return false;
         }
 
-        String authority = urlMatcher.group(PARSE_URL_AUTHORITY);
+        String authority = uri.getRawAuthority();
         if ("file".equals(scheme) && (authority == null || 
"".equals(authority))) {// Special case - file: allows an empty authority
             return true; // this is a local file - nothing more to do here
         } else if ("file".equals(scheme) && authority != null && 
authority.contains(":")) {
@@ -324,15 +302,15 @@ public class UrlValidator implements Serializable {
             }
         }
 
-        if (!isValidPath(urlMatcher.group(PARSE_URL_PATH))) {
+        if (!isValidPath(uri.getRawPath())) {
             return false;
         }
 
-        if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) {
+        if (!isValidQuery(uri.getRawQuery())) {
             return false;
         }
 
-        if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) {
+        if (!isValidFragment(uri.getRawFragment())) {
             return false;
         }
 
@@ -536,17 +514,11 @@ public class UrlValidator implements Serializable {
         return (options & flag) == 0;
     }
 
-    // Unit test access to pattern matcher
-    Matcher matchURL(String value) {
-        return URL_PATTERN.matcher(value);
-    }
-
     /**
      * Validator for checking URL parsing
      * @param args - URLs to validate
      */
     public static void main(String[] args) {
-        UrlValidator val = new UrlValidator(new String[] { "file", "http", 
"https" }, UrlValidator.ALLOW_LOCAL_URLS);
         for(String arg: args) {
             try {
                 URI uri = new URI(arg);
@@ -567,29 +539,6 @@ public class UrlValidator implements Serializable {
             } catch (URISyntaxException e) {
                 System.out.println(e.getMessage());
             }
-           Matcher m = val.matchURL(arg);
-           if (m.matches()) {
-              System.out.printf("%s has %d parts%n",arg,m.groupCount());
-              for(int i=1;i <m.groupCount(); i++) {
-                 String grp = m.group(i);
-                 if (grp != null) {
-                    System.out.printf("%d: %s%n",i, grp);
-                 }
-              }
-              String authority = m.group(PARSE_URL_AUTHORITY);
-              String path = m.group(PARSE_URL_PATH);
-              String query = m.group(PARSE_URL_QUERY);
-              String frag = m.group(PARSE_URL_FRAGMENT);
-              System.out.printf("auth: %s %s%n", 
authority,val.isValidAuthority(authority));
-              System.out.printf("path: %s %s%n", path,val.isValidPath(path));
-              System.out.printf("query: %s %s%n", 
query,val.isValidQuery(query));
-              System.out.printf("frag: %s %s%n", 
frag,val.isValidFragment(frag));
-              System.out.printf("valid: %s %s%n", arg,val.isValid(arg));
-              System.out.println();
-           } else {
-              System.out.printf("%s is not valid%n",arg);
-           }
-  
         }
      }   
   
diff --git 
a/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java 
b/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
index c67926d..8cbda0d 100644
--- a/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
+++ b/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
@@ -254,7 +254,7 @@ public class UrlValidatorTest extends TestCase {
         assertTrue("file:///c:/ should now be allowed",
                  validator.isValid("file:///C:/some.file"));
 
-        assertTrue("file:///c:\\ should be allowed",
+        assertFalse("file:///c:\\ should not be allowed", // Only allow 
forward slashes
               validator.isValid("file:///C:\\some.file"));
 
         assertTrue("file:///etc/ should now be allowed",
@@ -333,9 +333,7 @@ public class UrlValidatorTest extends TestCase {
     public void testValidator464() {
         String[] schemes = {"file"};
         UrlValidator urlValidator = new UrlValidator(schemes);
-        String fileOK = "file:///bad ^ domain.com/label/test"; // TODO should 
'^' be allowed unescaped?
         String fileNAK = "file://bad ^ domain.com/label/test";
-        assertTrue(fileOK, urlValidator.isValid(fileOK));
         assertFalse(fileNAK, urlValidator.isValid(fileNAK));
     }
 
@@ -519,6 +517,20 @@ public class UrlValidatorTest extends TestCase {
       assertTrue(validator.isValid("http://example.com//_test";)); // 
VALIDATOR-429
   }
 
+  public void testValidator283() {
+   UrlValidator validator = new UrlValidator();
+   
assertFalse(validator.isValid("http://finance.yahoo.com/news/Owners-54B-NY-housing-apf-2493139299.html?x=0&ap=%fr";));
+   
assertTrue(validator.isValid("http://finance.yahoo.com/news/Owners-54B-NY-housing-apf-2493139299.html?x=0&ap=%22";));
+  }
+
+  public void testFragments() {
+   String[] schemes = {"http","https"};
+   UrlValidator urlValidator = new UrlValidator(schemes, 
UrlValidator.NO_FRAGMENTS);
+   assertFalse(urlValidator.isValid("http://apache.org/a/b/c#frag";));
+   urlValidator = new UrlValidator(schemes);
+   assertTrue(urlValidator.isValid("http://apache.org/a/b/c#frag";));
+}
+
   //-------------------- Test data for creating a composite URL
    /**
     * The data given below approximates the 4 parts of a URL

Reply via email to