Author: vsiveton Date: Wed May 20 11:05:40 2009 New Revision: 776659 URL: http://svn.apache.org/viewvc?rev=776659&view=rev Log: o clarify javadoc for escapeHTML/unescapeHTML and added new unescapeHTML o take care of ' if xmlmode o updated test case o removed throw IllegalArgumentException in unescapeHTML
Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=776659&r1=776658&r2=776659&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java Wed May 20 11:05:40 2009 @@ -95,6 +95,8 @@ /** * Escape special HTML characters in a String in <code>xml</code> mode. * + * <b>Note</b>: this method doesn't escape non-ascii characters by numeric characters references. + * * @param text the String to escape, may be null. * @return The escaped text or the empty string if text == null. * @see #escapeHTML(String,boolean) @@ -108,10 +110,11 @@ * Escape special HTML characters in a String. * * <pre> - * < becomes <code><</code> - * > becomes <code>></code> - * & becomes <code>&</code> - * " becomes <code>"</code> + * < becomes <code>&lt;</code> + * > becomes <code>&gt;</code> + * & becomes <code>&amp;</code> + * " becomes <code>&quot;</code> + * ' becomes <code>&apos;</code> if xmlMode = true * </pre> * * If <code>xmlMode</code> is true, every other character than the above remains unchanged, @@ -124,9 +127,12 @@ * </pre> * * @param text The String to escape, may be null. - * @param xmlMode set to <code>false</code> to replace non-ascii characters. + * @param xmlMode <code>true</code> to replace also ' to &apos, <code>false</code> to replace non-ascii + * characters by numeric characters references. * @return The escaped text or the empty string if text == null. * @since 1.1 + * @see <a href="http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent">http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent</a> + * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">http://www.w3.org/TR/html401/charset.html#h-5.3</a> */ public static final String escapeHTML( String text, boolean xmlMode ) { @@ -158,7 +164,14 @@ default: if ( xmlMode ) { - buffer.append( c ); + if ( c == '\'' ) + { + buffer.append( "'" ); + } + else + { + buffer.append( c ); + } } else { @@ -188,6 +201,19 @@ } /** + * Unescapes HTML entities in a string in non xml mode. + * + * @param text the <code>String</code> to unescape, may be null. + * @return a new unescaped <code>String</code>, <code>null</code> if null string input. + * @since 1.1.1. + * @see #unescapeHTML(String, boolean) + */ + public static String unescapeHTML( String text ) + { + return unescapeHTML( text, false ); + } + + /** * Unescapes HTML entities in a string. * * <p> Unescapes a string containing entity escapes to a string @@ -204,18 +230,27 @@ * </pre> * * @param text the <code>String</code> to unescape, may be null. + * @param xmlMode set to <code>true</code> to replace &apos by '. * @return a new unescaped <code>String</code>, <code>null</code> if null string input. * @since 1.1.1. */ - public static String unescapeHTML( String text ) + public static String unescapeHTML( String text, boolean xmlMode ) { if ( text == null ) { return null; } - // StringEscapeUtils.unescapeHtml returns entities it doesn't recognize unchanged - String unescaped = StringEscapeUtils.unescapeHtml( text ); + String unescaped; + if ( xmlMode ) + { + unescaped = StringEscapeUtils.unescapeXml( text ); + } + else + { + // StringEscapeUtils.unescapeHtml returns entities it doesn't recognize unchanged + unescaped = StringEscapeUtils.unescapeHtml( text ); + } if ( !text.equals( unescaped ) ) { @@ -233,21 +268,19 @@ } tmp = tmp.substring( i + 3 ); - if ( tmp.indexOf( ';' ) == -1 ) - { - throw new IllegalArgumentException( "Wrong HTML near '..." + tmp + "'" ); - } - - String entity = tmp.substring( 0, tmp.indexOf( ';' ) ); - try - { - Integer.parseInt( entity, 16 ); - } - catch ( Exception e ) + if ( tmp.indexOf( ';' ) != -1 ) { - throw new IllegalArgumentException( "Wrong HTML near '..." + tmp + "'" ); + String entity = tmp.substring( 0, tmp.indexOf( ';' ) ); + try + { + Integer.parseInt( entity, 16 ); + entities.add( entity ); + } + catch ( NumberFormatException e ) + { + // nop + } } - entities.add( entity ); } for ( int i = 0; i < entities.size(); i++ ) @@ -383,7 +416,7 @@ } // -// Imported code from ASF Harmony project +// Imported code from ASF Harmony project rev 770909 // http://svn.apache.org/repos/asf/harmony/enhanced/classlib/trunk/modules/luni/src/main/java/java/lang/Character.java // Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=776659&r1=776658&r2=776659&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java Wed May 20 11:05:40 2009 @@ -19,7 +19,6 @@ * under the License. */ -import org.apache.commons.lang.StringEscapeUtils; import org.codehaus.plexus.PlexusTestCase; /** @@ -38,13 +37,19 @@ { assertEquals( HtmlTools.escapeHTML( null ), "" ); assertEquals( HtmlTools.escapeHTML( "" ), "" ); + assertEquals( HtmlTools.escapeHTML( "\u0009" ), "\u0009" ); + assertEquals( HtmlTools.escapeHTML( "\u0001" ), "\u0001" ); + + // Predefined entities assertEquals( HtmlTools.escapeHTML( "<" ), "<" ); assertEquals( HtmlTools.escapeHTML( ">" ), ">" ); assertEquals( HtmlTools.escapeHTML( "&" ), "&" ); assertEquals( HtmlTools.escapeHTML( "\"" ), """ ); - assertEquals( HtmlTools.escapeHTML( "&" ), "&amp;" ); + assertEquals( HtmlTools.escapeHTML( "\'" ), "'" ); + assertEquals( HtmlTools.escapeHTML( "\'", false ), "\'" ); // xml mode + assertEquals( HtmlTools.escapeHTML( "&" ), "&amp;" ); assertEquals( HtmlTools.escapeHTML( "\u00e4", true ), "\u00e4" ); assertEquals( HtmlTools.escapeHTML( "\u00e4", false ), "ä" ); assertEquals( HtmlTools.escapeHTML( "\u0159", false ), "ř" ); @@ -58,26 +63,23 @@ { assertNull( HtmlTools.unescapeHTML( null ) ); assertEquals( "", HtmlTools.unescapeHTML( "" ) ); + assertEquals( "\u0009", HtmlTools.unescapeHTML( "\u0009" ) ); + assertEquals( "\u0001", HtmlTools.unescapeHTML( "\u0001" ) ); assertEquals( "<", HtmlTools.unescapeHTML( "<" ) ); assertEquals( ">", HtmlTools.unescapeHTML( ">" ) ); assertEquals( "&", HtmlTools.unescapeHTML( "&" ) ); assertEquals( "\"", HtmlTools.unescapeHTML( """ ) ); + assertEquals( "'", HtmlTools.unescapeHTML( "'" ) ); + assertEquals( "\'", HtmlTools.unescapeHTML( "'", true ) ); assertEquals( "&", HtmlTools.unescapeHTML( "&amp;" ) ); assertEquals( "<Français>", HtmlTools.unescapeHTML( "&lt;Fran&ccedil;ais&gt;" ) ); assertEquals( "\u0159", HtmlTools.unescapeHTML( "ř" ) ); assertEquals( "\uD808\uDF45", HtmlTools.unescapeHTML( "𒍅" ) ); assertEquals( "\uD835\uDFED", HtmlTools.unescapeHTML( "𝟭" ) ); assertEquals( "\uD808\uDF45\uD835\uDFED", HtmlTools.unescapeHTML( "𒍅𝟭" ) ); - - try - { - HtmlTools.unescapeHTML( "test 𝟭 test" ); - assertTrue( false ); - } - catch ( IllegalArgumentException e ) - { - assertTrue( true ); - } + assertEquals( "𝟭 𝟭", HtmlTools.unescapeHTML( "𝟭 𝟭" ) ); + assertEquals( "𝟭 \uD835\uDFED", HtmlTools.unescapeHTML( "𝟭 𝟭" ) ); + assertEquals( "&#xQWER;", HtmlTools.unescapeHTML( "&#xQWER;" ) ); } /**