Author: ltheussl
Date: Tue May  5 08:06:19 2009
New Revision: 771612

URL: http://svn.apache.org/viewvc?rev=771612&view=rev
Log:
[DOXIA-311] Character references do not work in xdoc section titles.
Partial fix: all html entities should be properly handled now, custom entities 
are still un-escaped in section titles.
Entities are now emitted as text (not rawText) by the XhtmlBaseParser, 
unrecognized entities are emitted as an unknown event.

Modified:
    maven/doxia/doxia/trunk/doxia-core/pom.xml
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
    
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
    
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
    
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
    
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
    
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
    
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java

Modified: maven/doxia/doxia/trunk/doxia-core/pom.xml
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/pom.xml?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/pom.xml (original)
+++ maven/doxia/doxia/trunk/doxia-core/pom.xml Tue May  5 08:06:19 2009
@@ -55,6 +55,11 @@
       <artifactId>xercesImpl</artifactId>
       <version>2.8.1</version>
     </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <version>2.4</version>
+    </dependency>
 
     <!-- test -->
   </dependencies>

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java
 Tue May  5 08:06:19 2009
@@ -48,6 +48,13 @@
     /** An end HTML tag. Eg <code>&lt;/p&gt;</code>. */
     int TAG_TYPE_END = 3;
 
+    /**
+     * An HTML entity. Eg <code>&amp;lt;</code>.
+     *
+     * @since 1.1.1.
+     */
+    int ENTITY_TYPE = 4;
+
     // ----------------------------------------------------------------------
     // All XHTML 1.0 tags
     // ----------------------------------------------------------------------

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
 Tue May  5 08:06:19 2009
@@ -23,10 +23,12 @@
 import javax.swing.text.html.HTML.Tag;
 
 import org.apache.maven.doxia.macro.MacroExecutionException;
+import org.apache.maven.doxia.markup.HtmlMarkup;
 import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
 import org.apache.maven.doxia.sink.SinkEventAttributes;
 import org.apache.maven.doxia.util.DoxiaUtils;
+import org.apache.maven.doxia.util.HtmlTools;
 
 import org.codehaus.plexus.util.StringUtils;
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
@@ -467,13 +469,20 @@
         }
         else
         {
-            if ( getLocalEntities().containsKey( textChars ) )
+            String unescaped = HtmlTools.unescapeHtml( text );
+
+            // TODO: StringEscapeUtils.unescapeHtml returns unknown entities 
as is,
+            // they should be handled as one character as well
+            if ( text.equals( unescaped ) && text.length() > 1 )
             {
-                sink.rawText( text );
+                // this means the entity is unrecognized: emit as unknown
+                Object[] required = new Object[] { new Integer( 
HtmlMarkup.ENTITY_TYPE ) };
+
+                sink.unknown( text, required, null );
             }
             else
             {
-                sink.text( text );
+                sink.text( unescaped );
             }
         }
     }

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java
 Tue May  5 08:06:19 2009
@@ -1769,7 +1769,8 @@
         {
             getTextBuffer().append( text );
         }
-        else {
+        else
+        {
             write( text );
         }
     }
@@ -1797,6 +1798,22 @@
      */
     public void unknown( String name, Object[] requiredParams, 
SinkEventAttributes attributes )
     {
+        if ( requiredParams == null || !( requiredParams[0] instanceof Integer 
) )
+        {
+            getLog().warn( "Missing type information for unknown event: " + 
name + ", ignoring!" );
+
+            return;
+        }
+
+        int tagType = ( (Integer) requiredParams[0] ).intValue();
+
+        if ( tagType == ENTITY_TYPE )
+        {
+            rawText( name );
+
+            return;
+        }
+
         Tag tag = HtmlTools.getHtmlTag( name );
 
         if ( tag == null )
@@ -1805,13 +1822,6 @@
         }
         else
         {
-            if ( requiredParams == null || !( requiredParams[0] instanceof 
Integer ) )
-            {
-                throw new IllegalArgumentException( "Missing required 
parameter: TAG_TYPE" );
-            }
-
-            int tagType = ( (Integer) requiredParams[0] ).intValue();
-
             if ( tagType == TAG_TYPE_SIMPLE )
             {
                 writeSimpleTag( tag, attributes );
@@ -1878,7 +1888,7 @@
      */
     protected static String escapeHTML( String text )
     {
-        return HtmlTools.escapeHTML( text );
+        return HtmlTools.escapeHTML( text, false );
     }
 
     /**

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java
 Tue May  5 08:06:19 2009
@@ -25,6 +25,8 @@
 
 import javax.swing.text.html.HTML.Tag;
 
+import org.apache.commons.lang.StringEscapeUtils;
+
 import org.apache.maven.doxia.markup.HtmlMarkup;
 
 
@@ -173,6 +175,27 @@
     }
 
     /**
+     * Unescapes HTML entities in a string.
+     *
+     * <p> Unescapes a string containing entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes. Supports HTML 4.0 entities.</p>
+     *
+     * <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
+     * will become "&lt;Fran&ccedil;ais&gt;".</p>
+     *
+     * @param text the <code>String</code> to unescape, may be null.
+     *
+     * @return a new unescaped <code>String</code>, <code>null</code> if null 
string input.
+     *
+     * @since 1.1.1.
+     */
+    public static String unescapeHtml( String text )
+    {
+        return StringEscapeUtils.unescapeHtml( text );
+    }
+
+    /**
      * Encode an url
      *
      * @param url the String to encode, may be null

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java
 Tue May  5 08:06:19 2009
@@ -336,21 +336,20 @@
         assertEquals( "bold", event.getName() );
 
         event = (SinkEventElement) it.next();
-        assertEquals( "rawText", event.getName() );
-        assertEquals( "&#x159;",  (String) event.getArgs()[0] );
+        assertEquals( "text", event.getName() );
+        assertEquals( "\u0159",  (String) event.getArgs()[0] );
 
         event = (SinkEventElement) it.next();
-        assertEquals( "rawText", event.getName() );
-        assertEquals( "&nbsp;",  (String) event.getArgs()[0] );
+        assertEquals( "text", event.getName() );
+        assertEquals( "\u00A0",  (String) event.getArgs()[0] );
 
         event = (SinkEventElement) it.next();
-        assertEquals( "rawText", event.getName() );
-        assertEquals( "&#x161;",  (String) event.getArgs()[0] );
+        assertEquals( "text", event.getName() );
+        assertEquals( "\u0161",  (String) event.getArgs()[0] );
 
         event = (SinkEventElement) it.next();
-        // FIXME: DOXIA-310
-        //assertEquals( "rawText", event.getName() );
-        //assertEquals( "&#x1d7ed;",  (String) event.getArgs()[0] );
+        assertEquals( "unknown", event.getName() );
+        assertEquals( "&#x1d7ed;",  (String) event.getArgs()[0] );
 
         event = (SinkEventElement) it.next();
         assertEquals( "bold_", event.getName() );
@@ -361,7 +360,7 @@
         throws Exception
     {
         final String text = "<!DOCTYPE test [<!ENTITY foo \"&#x159;\"><!ENTITY 
tritPos \"&#x1d7ed;\">]>"
-                + 
"<body><h2>&amp;&foo;</h2><p>&amp;&foo;</p><p>&tritPos;</p></body>";
+                + 
"<body><h2>&amp;&foo;&tritPos;</h2><p>&amp;&foo;&tritPos;</p></body>";
 
         parser.setValidate( false );
         parser.parse( text, sink );
@@ -376,8 +375,12 @@
         assertEquals( "&", textEvt.getArgs()[0] );
 
         textEvt = (SinkEventElement) it.next();
-        assertEquals( "rawText", textEvt.getName() );
-        assertEquals( "&#x159;", textEvt.getArgs()[0] );
+        assertEquals( "text", textEvt.getName() );
+        assertEquals( "\u0159", textEvt.getArgs()[0] );
+
+        textEvt = (SinkEventElement) it.next();
+        assertEquals( "unknown", textEvt.getName() );
+        assertEquals( "&#x1d7ed;", textEvt.getArgs()[0] );
 
         assertEquals( "sectionTitle1_", ( (SinkEventElement) it.next() 
).getName() );
         assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() 
);
@@ -387,15 +390,11 @@
         assertEquals( "&", textEvt.getArgs()[0] );
 
         textEvt = (SinkEventElement) it.next();
-        assertEquals( "rawText", textEvt.getName() );
-        assertEquals( "&#x159;", textEvt.getArgs()[0] );
-
-        assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() 
);
-        //assertEquals( "section1_", ( (SinkEventElement) it.next() 
).getName() );
+        assertEquals( "text", textEvt.getName() );
+        assertEquals( "\u0159", textEvt.getArgs()[0] );
 
-        assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() 
);
         textEvt = (SinkEventElement) it.next();
-        assertEquals( "rawText", textEvt.getName() );
+        assertEquals( "unknown", textEvt.getName() );
         assertEquals( "&#x1d7ed;", textEvt.getArgs()[0] );
         assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() 
);
 

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java
 Tue May  5 08:06:19 2009
@@ -596,9 +596,8 @@
         sink.paragraph_();
 
         sink.paragraph();
-        sink.text( "Copyright symbol: " + COPYRIGHT + ", " + COPYRIGHT + ", " 
+ COPYRIGHT + "." );
+        sink.text( "Copyright symbol:" );
+        sink.text( "\u00a9" );
         sink.paragraph_();
     }
-
-    private static final char COPYRIGHT = '\u00a9';
 }

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java
 Tue May  5 08:06:19 2009
@@ -744,7 +744,7 @@
      */
     public void testText()
     {
-        String text = "a text";
+        String text = "a text & \u00c6";
 
         try
         {
@@ -756,7 +756,7 @@
             sink.close();
         }
 
-        assertEquals( "a text", writer.toString() );
+        assertEquals( "a text &amp; &#198;", writer.toString() );
 
         writer =  new StringWriter();
 
@@ -770,7 +770,7 @@
             sink.close();
         }
 
-        assertEquals( "a text", writer.toString() );
+        assertEquals( "a text &amp; &#198;", writer.toString() );
     }
 
     /**

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java
 Tue May  5 08:06:19 2009
@@ -51,6 +51,22 @@
     /**
      * Verify the expected results.
      */
+    public void testUnescapeHTML()
+    {
+        assertNull( HtmlTools.unescapeHtml( null ) );
+        assertEquals( "", HtmlTools.unescapeHtml( "" ) );
+        assertEquals( "<", HtmlTools.unescapeHtml( "&lt;" ) );
+        assertEquals( ">", HtmlTools.unescapeHtml( "&gt;" ) );
+        assertEquals( "&", HtmlTools.unescapeHtml( "&amp;" ) );
+        assertEquals( "\"", HtmlTools.unescapeHtml( "&quot;" ) );
+        assertEquals( "&amp;", HtmlTools.unescapeHtml( "&amp;amp;" ) );
+        assertEquals( "&lt;Fran&ccedil;ais&gt;", HtmlTools.unescapeHtml( 
"&amp;lt;Fran&amp;ccedil;ais&amp;gt;" ) );
+        assertEquals( "&#x12345;", HtmlTools.unescapeHtml( "&#x12345;" ) );
+    }
+
+    /**
+     * Verify the expected results.
+     */
     public void testEncodeId()
     {
         assertEquals( HtmlTools.encodeId( null ), null );

Modified: 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
 Tue May  5 08:06:19 2009
@@ -36,6 +36,7 @@
 import org.apache.maven.doxia.parser.XhtmlBaseParser;
 import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
+import org.apache.maven.doxia.util.HtmlTools;
 
 import org.codehaus.plexus.util.IOUtil;
 import org.codehaus.plexus.util.StringUtils;
@@ -184,7 +185,7 @@
 
             sink.sectionTitle( Sink.SECTION_LEVEL_1, attribs );
 
-            sink.text( parser.getAttributeValue( null, 
Attribute.NAME.toString() ) );
+            sink.text( HtmlTools.unescapeHtml( parser.getAttributeValue( null, 
Attribute.NAME.toString() ) ) );
 
             sink.sectionTitle1_();
         }
@@ -203,7 +204,7 @@
 
             sink.sectionTitle( Sink.SECTION_LEVEL_2, attribs );
 
-            sink.text( parser.getAttributeValue( null, 
Attribute.NAME.toString() ) );
+            sink.text( HtmlTools.unescapeHtml( parser.getAttributeValue( null, 
Attribute.NAME.toString() ) ) );
 
             sink.sectionTitle2_();
         }

Modified: 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java?rev=771612&r1=771611&r2=771612&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
 Tue May  5 08:06:19 2009
@@ -454,8 +454,7 @@
 
         SinkEventElement textEvt = (SinkEventElement) it.next();
         assertEquals( "text", textEvt.getName() );
-        // FIXME: DOXIA-311
-        assertEquals( "&&#x159;", textEvt.getArgs()[0] );
+        assertEquals( "&\u0159", textEvt.getArgs()[0] );
 
         assertEquals( "sectionTitle1_", ( (SinkEventElement) it.next() 
).getName() );
         assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() 
);
@@ -465,8 +464,8 @@
         assertEquals( "&", textEvt.getArgs()[0] );
 
         textEvt = (SinkEventElement) it.next();
-        assertEquals( "rawText", textEvt.getName() );
-        assertEquals( "&#x159;", textEvt.getArgs()[0] );
+        assertEquals( "text", textEvt.getName() );
+        assertEquals( "\u0159", textEvt.getArgs()[0] );
 
         assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() 
);
         assertEquals( "section1_", ( (SinkEventElement) it.next() ).getName() 
);


Reply via email to