Author: vsiveton
Date: Wed May 20 11:05:40 2009
New Revision: 776659

URL: http://svn.apache.org/viewvc?rev=776659&view=rev
Log:
o clarify javadoc for escapeHTML/unescapeHTML and added new unescapeHTML
o take care of ' if xmlmode
o updated test case
o removed throw IllegalArgumentException in unescapeHTML

Modified:
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
    
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=776659&r1=776658&r2=776659&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
 Wed May 20 11:05:40 2009
@@ -95,6 +95,8 @@
     /**
      * Escape special HTML characters in a String in <code>xml</code> mode.
      *
+     * <b>Note</b>: this method doesn't escape non-ascii characters by numeric 
characters references.
+     *
      * @param text the String to escape, may be null.
      * @return The escaped text or the empty string if text == null.
      * @see #escapeHTML(String,boolean)
@@ -108,10 +110,11 @@
      * Escape special HTML characters in a String.
      *
      * <pre>
-     * < becomes <code>&lt;</code>
-     * > becomes <code>&gt;</code>
-     * & becomes <code>&amp;</code>
-     * " becomes <code>&quot;</code>
+     * < becomes <code>&#38;lt;</code>
+     * > becomes <code>&#38;gt;</code>
+     * & becomes <code>&#38;amp;</code>
+     * " becomes <code>&#38;quot;</code>
+     * ' becomes <code>&#38;apos;</code> if xmlMode = true
      * </pre>
      *
      * If <code>xmlMode</code> is true, every other character than the above 
remains unchanged,
@@ -124,9 +127,12 @@
      * </pre>
      *
      * @param text The String to escape, may be null.
-     * @param xmlMode set to <code>false</code> to replace non-ascii 
characters.
+     * @param xmlMode <code>true</code> to replace also ' to &#38;apos, 
<code>false</code> to replace non-ascii
+     * characters by numeric characters references.
      * @return The escaped text or the empty string if text == null.
      * @since 1.1
+     * @see <a 
href="http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent";>http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent</a>
+     * @see <a 
href="http://www.w3.org/TR/html401/charset.html#h-5.3";>http://www.w3.org/TR/html401/charset.html#h-5.3</a>
      */
     public static final String escapeHTML( String text, boolean xmlMode )
     {
@@ -158,7 +164,14 @@
                 default:
                     if ( xmlMode )
                     {
-                        buffer.append( c );
+                        if ( c == '\'' )
+                        {
+                            buffer.append( "&apos;" );
+                        }
+                        else
+                        {
+                            buffer.append( c );
+                        }
                     }
                     else
                     {
@@ -188,6 +201,19 @@
     }
 
     /**
+     * Unescapes HTML entities in a string in non xml mode.
+     *
+     * @param text the <code>String</code> to unescape, may be null.
+     * @return a new unescaped <code>String</code>, <code>null</code> if null 
string input.
+     * @since 1.1.1.
+     * @see #unescapeHTML(String, boolean)
+     */
+    public static String unescapeHTML( String text )
+    {
+        return unescapeHTML( text, false );
+    }
+
+    /**
      * Unescapes HTML entities in a string.
      *
      * <p> Unescapes a string containing entity escapes to a string
@@ -204,18 +230,27 @@
      * </pre>
      *
      * @param text the <code>String</code> to unescape, may be null.
+     * @param xmlMode set to <code>true</code> to replace &#38;apos by '.
      * @return a new unescaped <code>String</code>, <code>null</code> if null 
string input.
      * @since 1.1.1.
      */
-    public static String unescapeHTML( String text )
+    public static String unescapeHTML( String text, boolean xmlMode )
     {
         if ( text == null )
         {
             return null;
         }
 
-        // StringEscapeUtils.unescapeHtml returns entities it doesn't 
recognize unchanged
-        String unescaped = StringEscapeUtils.unescapeHtml( text );
+        String unescaped;
+        if ( xmlMode )
+        {
+            unescaped = StringEscapeUtils.unescapeXml( text );
+        }
+        else
+        {
+            // StringEscapeUtils.unescapeHtml returns entities it doesn't 
recognize unchanged
+            unescaped = StringEscapeUtils.unescapeHtml( text );
+        }
 
         if ( !text.equals( unescaped ) )
         {
@@ -233,21 +268,19 @@
             }
 
             tmp = tmp.substring( i + 3 );
-            if ( tmp.indexOf( ';' ) == -1 )
-            {
-                throw new IllegalArgumentException( "Wrong HTML near '..." + 
tmp + "'" );
-            }
-
-            String entity = tmp.substring( 0, tmp.indexOf( ';' ) );
-            try
-            {
-                Integer.parseInt( entity, 16 );
-            }
-            catch ( Exception e )
+            if ( tmp.indexOf( ';' ) != -1 )
             {
-                throw new IllegalArgumentException( "Wrong HTML near '..." + 
tmp + "'" );
+                String entity = tmp.substring( 0, tmp.indexOf( ';' ) );
+                try
+                {
+                    Integer.parseInt( entity, 16 );
+                    entities.add( entity );
+                }
+                catch ( NumberFormatException e )
+                {
+                    // nop
+                }
             }
-            entities.add( entity );
         }
 
         for ( int i = 0; i < entities.size(); i++ )
@@ -383,7 +416,7 @@
     }
 
 //
-// Imported code from ASF Harmony project
+// Imported code from ASF Harmony project rev 770909
 // 
http://svn.apache.org/repos/asf/harmony/enhanced/classlib/trunk/modules/luni/src/main/java/java/lang/Character.java
 //
 

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=776659&r1=776658&r2=776659&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
 Wed May 20 11:05:40 2009
@@ -19,7 +19,6 @@
  * under the License.
  */
 
-import org.apache.commons.lang.StringEscapeUtils;
 import org.codehaus.plexus.PlexusTestCase;
 
 /**
@@ -38,13 +37,19 @@
     {
         assertEquals( HtmlTools.escapeHTML( null ), "" );
         assertEquals( HtmlTools.escapeHTML( "" ), "" );
+        assertEquals( HtmlTools.escapeHTML( "\u0009" ), "\u0009" );
+        assertEquals( HtmlTools.escapeHTML( "\u0001" ), "\u0001" );
+
+        // Predefined entities
         assertEquals( HtmlTools.escapeHTML( "<" ), "&lt;" );
         assertEquals( HtmlTools.escapeHTML( ">" ), "&gt;" );
         assertEquals( HtmlTools.escapeHTML( "&" ), "&amp;" );
         assertEquals( HtmlTools.escapeHTML( "\"" ), "&quot;" );
-        assertEquals( HtmlTools.escapeHTML( "&amp;" ), "&amp;amp;" );
+        assertEquals( HtmlTools.escapeHTML( "\'" ), "&apos;" );
+        assertEquals( HtmlTools.escapeHTML( "\'", false ), "\'" );
 
         // xml mode
+        assertEquals( HtmlTools.escapeHTML( "&amp;" ), "&amp;amp;" );
         assertEquals( HtmlTools.escapeHTML( "\u00e4", true ), "\u00e4" );
         assertEquals( HtmlTools.escapeHTML( "\u00e4", false ), "&#xe4;" );
         assertEquals( HtmlTools.escapeHTML( "\u0159", false ), "&#x159;" );
@@ -58,26 +63,23 @@
     {
         assertNull( HtmlTools.unescapeHTML( null ) );
         assertEquals( "", HtmlTools.unescapeHTML( "" ) );
+        assertEquals( "\u0009", HtmlTools.unescapeHTML( "\u0009" ) );
+        assertEquals( "\u0001", HtmlTools.unescapeHTML( "\u0001" ) );
         assertEquals( "<", HtmlTools.unescapeHTML( "&lt;" ) );
         assertEquals( ">", HtmlTools.unescapeHTML( "&gt;" ) );
         assertEquals( "&", HtmlTools.unescapeHTML( "&amp;" ) );
         assertEquals( "\"", HtmlTools.unescapeHTML( "&quot;" ) );
+        assertEquals( "&apos;", HtmlTools.unescapeHTML( "&apos;" ) );
+        assertEquals( "\'", HtmlTools.unescapeHTML( "&apos;", true ) );
         assertEquals( "&amp;", HtmlTools.unescapeHTML( "&amp;amp;" ) );
         assertEquals( "&lt;Fran&ccedil;ais&gt;", HtmlTools.unescapeHTML( 
"&amp;lt;Fran&amp;ccedil;ais&amp;gt;" ) );
         assertEquals( "\u0159", HtmlTools.unescapeHTML( "&#x159;" ) );
         assertEquals( "\uD808\uDF45", HtmlTools.unescapeHTML( "&#x12345;" ) );
         assertEquals( "\uD835\uDFED", HtmlTools.unescapeHTML( "&#x1d7ed;" ) );
         assertEquals( "\uD808\uDF45\uD835\uDFED", HtmlTools.unescapeHTML( 
"&#x12345;&#x1d7ed;" ) );
-
-        try
-        {
-            HtmlTools.unescapeHTML( "test &#x1d7ed test" );
-            assertTrue( false );
-        }
-        catch ( IllegalArgumentException e )
-        {
-            assertTrue( true );
-        }
+        assertEquals( "&#x1d7ed &#x1d7ed", HtmlTools.unescapeHTML( "&#x1d7ed 
&#x1d7ed" ) );
+        assertEquals( "&#x1d7ed \uD835\uDFED", HtmlTools.unescapeHTML( 
"&#x1d7ed &#x1d7ed;" ) );
+        assertEquals( "&#xQWER;", HtmlTools.unescapeHTML( "&#xQWER;" ) );
     }
 
     /**


Reply via email to