Author: ltheussl Date: Mon Apr 13 19:55:04 2009 New Revision: 764589 URL: http://svn.apache.org/viewvc?rev=764589&view=rev Log: [DOXIA-239] Handle non-ASCII characters in anchors and id's
Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/DoxiaUtilsTest.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=764589&r1=764588&r2=764589&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java Mon Apr 13 19:55:04 2009 @@ -257,92 +257,19 @@ /** * Construct a valid id. - * <p> - * According to the <a href="http://www.w3.org/TR/html4/types.html#type-name"> - * HTML 4.01 specification section 6.2 SGML basic types</a>: - * </p> - * <p> - * <i>ID and NAME tokens must begin with a letter ([A-Za-z]) and may be - * followed by any number of letters, digits ([0-9]), hyphens ("-"), - * underscores ("_"), colons (":"), and periods (".").</i> - * </p> * * <p> - * According to <a href="http://www.w3.org/TR/xhtml1/#C_8">XHTML 1.0 - * section C.8. Fragment Identifiers</a>: - * </p> - * <p> - * <i>When defining fragment identifiers to be backward-compatible, only - * strings matching the pattern [A-Za-z][A-Za-z0-9:_.-]* should be used.</i> + * <b>Note</b>: this method is identical to {...@link DoxiaUtils#encodeId(String)}, + * the rules to encode an id are laid out there. * </p> * - * <p> - * To achieve this we need to convert the <i>id</i> String. Two conversions - * are necessary and one is done to get prettier ids: - * </p> - * <ol> - * <li>If the first character is not a letter, prepend the id with the - * letter 'a'</li> - * <li>A space is replaced with an underscore '_'</li> - * <li>Remove whitespace at the start and end before starting to process</li> - * </ol> - * - * <p> - * For letters, the case is preserved in the conversion. - * </p> - * - * <p> - * Here are some examples: - * </p> - * <pre> - * HtmlTools.encodeId( null ) = null - * HtmlTools.encodeId( "" ) = "" - * HtmlTools.encodeId( " _ " ) = "a_" - * HtmlTools.encodeId( "1" ) = "a1" - * HtmlTools.encodeId( "1anchor" ) = "a1anchor" - * HtmlTools.encodeId( "_anchor" ) = "a_anchor" - * HtmlTools.encodeId( "a b-c123 " ) = "a_b-c123" - * HtmlTools.encodeId( " anchor" ) = "anchor" - * HtmlTools.encodeId( "myAnchor" ) = "myAnchor" - * </pre> - * - * <b>Note</b>: this method is intentionally similar to {...@link DoxiaUtils#encodeId(String)}. - * * @param id The id to be encoded. * @return The trimmed and encoded id, or null if id is null. + * @see {...@link DoxiaUtils#encodeId(java.lang.String)}. */ public static String encodeId( String id ) { - if ( id == null ) - { - return null; - } - - id = id.trim(); - int length = id.length(); - StringBuffer buffer = new StringBuffer( length ); - - for ( int i = 0; i < length; ++i ) - { - char c = id.charAt( i ); - - if ( ( i == 0 ) && ( !Character.isLetter( c ) ) ) - { - buffer.append( "a" ); - } - - if ( c == ' ' ) - { - buffer.append( "_" ); - } - else if ( ( Character.isLetterOrDigit( c ) ) || ( c == '-' ) || ( c == '_' ) || ( c == ':' ) - || ( c == '.' ) ) - { - buffer.append( c ); - } - } - - return buffer.toString(); + return DoxiaUtils.encodeId( id ); } /** @@ -355,31 +282,7 @@ */ public static boolean isId( String text ) { - if ( text == null || text.length() == 0 ) - { - return false; - } - - for ( int i = 0; i < text.length(); ++i ) - { - char c = text.charAt( i ); - - if ( i == 0 && !Character.isLetter( c ) ) - { - return false; - } - - if ( c == ' ' ) - { - return false; - } - else if ( !Character.isLetterOrDigit( c ) && c != '-' && c != '_' && c != ':' && c != '.' ) - { - return false; - } - } - - return true; + return DoxiaUtils.isValidId( text ); } private HtmlTools() Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/DoxiaUtilsTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/DoxiaUtilsTest.java?rev=764589&r1=764588&r2=764589&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/DoxiaUtilsTest.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/DoxiaUtilsTest.java Mon Apr 13 19:55:04 2009 @@ -155,6 +155,8 @@ assertEquals( DoxiaUtils.encodeId( " anchor" ), "anchor" ); assertEquals( DoxiaUtils.encodeId( "myAnchor" ), "myAnchor" ); assertEquals( DoxiaUtils.encodeId( "my&Anchor" ), "my%26Anchor" ); + assertEquals( DoxiaUtils.encodeId( "HÃ¥kon" ), "H%c3%a5kon" ); + assertEquals( DoxiaUtils.encodeId( "TheuÃl" ), "Theu%c3%9fl" ); } /** @@ -178,5 +180,6 @@ assertTrue( DoxiaUtils.isValidId( "a:" ) ); assertTrue( DoxiaUtils.isValidId( "a." ) ); assertTrue( DoxiaUtils.isValidId( "index.html" ) ); + assertFalse( DoxiaUtils.isValidId( "TheuÃl" ) ); } } Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=764589&r1=764588&r2=764589&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java Mon Apr 13 19:55:04 2009 @@ -63,6 +63,8 @@ assertEquals( HtmlTools.encodeId( "a b-c123 " ), "a_b-c123" ); assertEquals( HtmlTools.encodeId( " anchor" ), "anchor" ); assertEquals( HtmlTools.encodeId( "myAnchor" ), "myAnchor" ); + assertEquals( HtmlTools.encodeId( "HÃ¥kon" ), "H%c3%a5kon" ); + assertEquals( HtmlTools.encodeId( "TheuÃl" ), "Theu%c3%9fl" ); } /** @@ -76,10 +78,9 @@ "http://www.example.com/?This is a simple test." ), "http://www.example.com/?This%20is%20a%20simple%20test." ); - // TODO: the & is not encoded? - //assertEquals( HtmlTools.encodeURL( - // "http://www.example.com/?This is a simple & short test." ), - // "http://www.example.com/?This%20is%20a%20simple%20%26%20short%20test." ); + assertEquals( HtmlTools.encodeURL( + "http://www.example.com/?This is a simple & short test." ), + "http://www.example.com/?This%20is%20a%20simple%20&%20short%20test." ); } /** @@ -101,6 +102,7 @@ assertTrue( HtmlTools.isId( "a-" ) ); assertTrue( HtmlTools.isId( "a:" ) ); assertTrue( HtmlTools.isId( "a." ) ); + assertFalse( HtmlTools.isId( "TheuÃl" ) ); } /**