Author: ltheussl Date: Tue Nov 2 13:33:03 2010 New Revision: 1030039 URL: http://svn.apache.org/viewvc?rev=1030039&view=rev Log: [DOXIA-410] Link checker not handling anchors very well Submitted by: James Strachan Patch applied with minor modifs to allow for brackets in regexps as needed for links to javadoc methods
Added: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/resources/anchorTest/ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/resources/anchorTest/testAnchor.html Modified: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java Added: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java?rev=1030039&view=auto ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java (added) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/Anchors.java Tue Nov 2 13:33:03 2010 @@ -0,0 +1,59 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.maven.doxia.linkcheck.validation; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A helper class to test if some content matches the given HTML anchor + */ +public class Anchors +{ + /** + * Returns true if the given anchor can be found in the content markup. + * + * @param content the content string. + * @param anchor the anchor to match. + * + * @return true if the given anchor can be found in the content markup. + */ + public static boolean matchesAnchor( String content, String anchor ) + { + if ( content != null && anchor.length() > 0 ) { + // can use name or id attributes and also can use single or double quotes with whitespace around the = + String regex = "(name|id)\\s*=\\s*('|\")" + escapeBrackets( anchor ) + "('|\")"; + Pattern pattern = Pattern.compile( regex ); + Matcher matcher = pattern.matcher( content ); + return matcher.find(); + } + return false; + } + + // for javadoc links, see DOXIA-410 + private static String escapeBrackets( String content ) + { + final String escaped = content.replace( "(", "\\(" ).replace( ")", "\\)" ); + return escaped.replace( "[", "\\[" ).replace( "]", "\\]" ); + } + + private Anchors() + { + // utility class + } +} Modified: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java?rev=1030039&r1=1030038&r2=1030039&view=diff ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java (original) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/FileLinkValidator.java Tue Nov 2 13:33:03 2010 @@ -108,7 +108,7 @@ public final class FileLinkValidator { // the anchor exists? String content = read( lvi.getSource(), encoding ); - if ( content != null && content.indexOf( "name=\"" + anchor + "\"" ) != -1 ) + if ( Anchors.matchesAnchor( content, anchor ) ) { return lvi.getSource(); } @@ -119,7 +119,7 @@ public final class FileLinkValidator // the anchor exists? String content = read( new File( lvi.getSource().getParentFile(), link ), encoding ); - if ( content != null && content.indexOf( "name=\"" + anchor + "\"" ) != -1 ) + if ( Anchors.matchesAnchor( content, anchor ) ) { return new File( lvi.getSource().getParentFile(), link ); } Modified: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java?rev=1030039&r1=1030038&r2=1030039&view=diff ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java (original) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/main/java/org/apache/maven/doxia/linkcheck/validation/OnlineHTTPLinkValidator.java Tue Nov 2 13:33:03 2010 @@ -157,6 +157,14 @@ public final class OnlineHTTPLinkValidat this.cl.getParams().setParameter( HttpMethodParams.USER_AGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" ); String link = lvi.getLink(); + String anchor = ""; + int idx = link.indexOf( '#' ); + if ( idx != -1 ) + { + anchor = link.substring( idx + 1 ); + link = link.substring( 0, idx ); + } + try { if ( link.startsWith( "/" ) ) @@ -200,6 +208,17 @@ public final class OnlineHTTPLinkValidat if ( hm.getStatusCode() == HttpStatus.SC_OK ) { + // lets check if the anchor is present + if ( anchor.length() > 0 ) + { + String content = hm.getResponseBodyAsString(); + + if ( !Anchors.matchesAnchor( content, anchor ) ) + { + return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, false, + "Missing anchor '" + anchor + "'" ); + } + } return new HTTPLinkValidationResult( LinkcheckFileResult.VALID_LEVEL, true, hm.getStatusCode(), hm.getStatusText() ); } Added: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java?rev=1030039&view=auto ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java (added) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/AnchorLinkTest.java Tue Nov 2 13:33:03 2010 @@ -0,0 +1,98 @@ +package org.apache.maven.doxia.linkcheck; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.apache.maven.doxia.linkcheck.model.LinkcheckFile; +import org.apache.maven.doxia.linkcheck.model.LinkcheckModel; +import org.codehaus.plexus.PlexusTestCase; + +import java.io.File; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +/** + * @author Ben Walding + * @author <a href="mailto:car...@apache.org">Carlos Sanchez</a> + * @version $Id: LinkCheckTest.java 800044 2009-08-02 12:28:50Z vsiveton $ + */ +public class AnchorLinkTest + extends PlexusTestCase +{ + /** + * @throws Exception + */ + public void testScan() + throws Exception + { + LinkCheck lc = (LinkCheck) lookup( LinkCheck.ROLE ); + assertNotNull( lc ); + + lc.setOnline( true ); // TODO: check if online + + lc.setBasedir( new File( getBasedir(), "src/test/resources/anchorTest" ) ); // TODO + + lc.setReportOutput( new File( getBasedir(), "target/linkcheck/anchorTest/linkcheck.xml" ) ); + + lc.setReportOutputEncoding( "UTF-8" ); + + lc.setLinkCheckCache( new File( getBasedir(), "target/linkcheck/anchorTest/linkcheck.cache" ) ); // TODO + + String[] excludes = new String[] + { + "http://cvs.apache.org/viewcvs.cgi/maven-pluginszz/", + "http://cvs.apache.org/viewcvs.cgi/mavenzz/" + }; + + lc.setExcludedLinks( excludes ); + + LinkcheckModel result = lc.execute(); + + Iterator iter = result.getFiles().iterator(); + + Map map = new HashMap(); + + while ( iter.hasNext() ) + { + LinkcheckFile ftc = (LinkcheckFile) iter.next(); + map.put( ftc.getRelativePath(), ftc ); + } + + assertEquals( "files.size()", 1, result.getFiles().size() ); + + LinkcheckFile ftc = check( map, "testAnchor.html", 1 ); + + //System.out.println("anchor test " + ftc.getResults()); + + assertEquals( "Should have matched!", 1, ftc.getSuccessful() ); + assertEquals( "Should have no failures!", 0, ftc.getUnsuccessful() ); + } + + private LinkcheckFile check( Map map, String name, int linkCount ) + { + LinkcheckFile ftc = (LinkcheckFile) map.get( name ); + + assertNotNull( name + " = null!", ftc ); + + assertEquals( name + ".getResults().size()", linkCount, ftc.getResults().size() ); + + return ftc; + } +} Modified: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java?rev=1030039&r1=1030038&r2=1030039&view=diff ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java (original) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/LinkCheckTest.java Tue Nov 2 13:33:03 2010 @@ -73,7 +73,7 @@ public class LinkCheckTest map.put( ftc.getRelativePath(), ftc ); } - assertEquals( "files.size()", 9, result.getFiles().size() ); + assertEquals( "files.size()", 10, result.getFiles().size() ); check( map, "nolink.html", 0 ); check( map, "test-resources/nolink.html", 0 ); @@ -81,6 +81,7 @@ public class LinkCheckTest check( map, "test-resources/test1/test2.html", 0 ); check( map, "test1/test1.html", 1 ); check( map, "testA.html", 3 ); + check( map, "anchorTest/testAnchor.html", 1 ); check( map, "linkincomment.html", 1 ); /* test excludes */ Added: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java?rev=1030039&view=auto ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java (added) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/java/org/apache/maven/doxia/linkcheck/validation/AnchorsTest.java Tue Nov 2 13:33:03 2010 @@ -0,0 +1,52 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.maven.doxia.linkcheck.validation; + +import junit.framework.TestCase; + +public class AnchorsTest extends TestCase +{ + + public void testAnchorMatching() + { + assertAnchorMatches( "hello <h1 id='foo'>Foo</h1> there", "foo", true ); + assertAnchorMatches( "hello <h1 id = 'foo'>Foo</h1> there", "foo", true ); + assertAnchorMatches( "hello <h1 id=\"foo\">Foo</h1> there", "foo", true ); + assertAnchorMatches( "hello <h1 id='foo2'>Foo</h1> there", "foo", false ); + final String apiAnchor = "assertEqualArrays(java.lang.Object[], java.lang.Object[])"; + assertAnchorMatches( "hello <h1 id='" + apiAnchor + "'>Foo</h1> there", apiAnchor, true ); + + assertAnchorMatches( "<html>\n" + + "<body>\n" + + "\n" + + "<h1 id='foo'>Foo</h1>\n" + + "<p>Some text</p>\n" + + "\n" + + "<h2>Something</h2>\n" + + "<p>Lets try using a link: <a href=\"testAnchor.html#foo\">FooLink</a></p>\n" + + "\n" + + "</body>\n" + + "</html>", "foo", true ); + } + + protected void assertAnchorMatches( String content, String anchor, boolean expected ) + { + boolean actual = Anchors.matchesAnchor( content, anchor ); + assertEquals( "anchor: " + anchor + " in: " + content, expected, actual ); + } +} Added: maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/resources/anchorTest/testAnchor.html URL: http://svn.apache.org/viewvc/maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/resources/anchorTest/testAnchor.html?rev=1030039&view=auto ============================================================================== --- maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/resources/anchorTest/testAnchor.html (added) +++ maven/doxia/doxia-tools/trunk/doxia-linkcheck/src/test/resources/anchorTest/testAnchor.html Tue Nov 2 13:33:03 2010 @@ -0,0 +1,11 @@ +<html> +<body> + +<h1 id='foo'>Foo</h1> +<p>Some text</p> + +<h2>Something</h2> +<p>Lets try using a link: <a href="testAnchor.html#foo">FooLink</a></p> + +</body> +</html> \ No newline at end of file