Author: ltheussl Date: Tue May 5 08:06:19 2009 New Revision: 771612 URL: http://svn.apache.org/viewvc?rev=771612&view=rev Log: [DOXIA-311] Character references do not work in xdoc section titles. Partial fix: all html entities should be properly handled now, custom entities are still un-escaped in section titles. Entities are now emitted as text (not rawText) by the XhtmlBaseParser, unrecognized entities are emitted as an unknown event.
Modified: maven/doxia/doxia/trunk/doxia-core/pom.xml maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java Modified: maven/doxia/doxia/trunk/doxia-core/pom.xml URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/pom.xml?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/pom.xml (original) +++ maven/doxia/doxia/trunk/doxia-core/pom.xml Tue May 5 08:06:19 2009 @@ -55,6 +55,11 @@ <artifactId>xercesImpl</artifactId> <version>2.8.1</version> </dependency> + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + <version>2.4</version> + </dependency> <!-- test --> </dependencies> Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/markup/HtmlMarkup.java Tue May 5 08:06:19 2009 @@ -48,6 +48,13 @@ /** An end HTML tag. Eg <code></p></code>. */ int TAG_TYPE_END = 3; + /** + * An HTML entity. Eg <code>&lt;</code>. + * + * @since 1.1.1. + */ + int ENTITY_TYPE = 4; + // ---------------------------------------------------------------------- // All XHTML 1.0 tags // ---------------------------------------------------------------------- Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java Tue May 5 08:06:19 2009 @@ -23,10 +23,12 @@ import javax.swing.text.html.HTML.Tag; import org.apache.maven.doxia.macro.MacroExecutionException; +import org.apache.maven.doxia.markup.HtmlMarkup; import org.apache.maven.doxia.sink.Sink; import org.apache.maven.doxia.sink.SinkEventAttributeSet; import org.apache.maven.doxia.sink.SinkEventAttributes; import org.apache.maven.doxia.util.DoxiaUtils; +import org.apache.maven.doxia.util.HtmlTools; import org.codehaus.plexus.util.StringUtils; import org.codehaus.plexus.util.xml.pull.XmlPullParser; @@ -467,13 +469,20 @@ } else { - if ( getLocalEntities().containsKey( textChars ) ) + String unescaped = HtmlTools.unescapeHtml( text ); + + // TODO: StringEscapeUtils.unescapeHtml returns unknown entities as is, + // they should be handled as one character as well + if ( text.equals( unescaped ) && text.length() > 1 ) { - sink.rawText( text ); + // this means the entity is unrecognized: emit as unknown + Object[] required = new Object[] { new Integer( HtmlMarkup.ENTITY_TYPE ) }; + + sink.unknown( text, required, null ); } else { - sink.text( text ); + sink.text( unescaped ); } } } Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/sink/XhtmlBaseSink.java Tue May 5 08:06:19 2009 @@ -1769,7 +1769,8 @@ { getTextBuffer().append( text ); } - else { + else + { write( text ); } } @@ -1797,6 +1798,22 @@ */ public void unknown( String name, Object[] requiredParams, SinkEventAttributes attributes ) { + if ( requiredParams == null || !( requiredParams[0] instanceof Integer ) ) + { + getLog().warn( "Missing type information for unknown event: " + name + ", ignoring!" ); + + return; + } + + int tagType = ( (Integer) requiredParams[0] ).intValue(); + + if ( tagType == ENTITY_TYPE ) + { + rawText( name ); + + return; + } + Tag tag = HtmlTools.getHtmlTag( name ); if ( tag == null ) @@ -1805,13 +1822,6 @@ } else { - if ( requiredParams == null || !( requiredParams[0] instanceof Integer ) ) - { - throw new IllegalArgumentException( "Missing required parameter: TAG_TYPE" ); - } - - int tagType = ( (Integer) requiredParams[0] ).intValue(); - if ( tagType == TAG_TYPE_SIMPLE ) { writeSimpleTag( tag, attributes ); @@ -1878,7 +1888,7 @@ */ protected static String escapeHTML( String text ) { - return HtmlTools.escapeHTML( text ); + return HtmlTools.escapeHTML( text, false ); } /** Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/util/HtmlTools.java Tue May 5 08:06:19 2009 @@ -25,6 +25,8 @@ import javax.swing.text.html.HTML.Tag; +import org.apache.commons.lang.StringEscapeUtils; + import org.apache.maven.doxia.markup.HtmlMarkup; @@ -173,6 +175,27 @@ } /** + * Unescapes HTML entities in a string. + * + * <p> Unescapes a string containing entity escapes to a string + * containing the actual Unicode characters corresponding to the + * escapes. Supports HTML 4.0 entities.</p> + * + * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" + * will become "<Français>".</p> + * + * @param text the <code>String</code> to unescape, may be null. + * + * @return a new unescaped <code>String</code>, <code>null</code> if null string input. + * + * @since 1.1.1. + */ + public static String unescapeHtml( String text ) + { + return StringEscapeUtils.unescapeHtml( text ); + } + + /** * Encode an url * * @param url the String to encode, may be null Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/parser/XhtmlBaseParserTest.java Tue May 5 08:06:19 2009 @@ -336,21 +336,20 @@ assertEquals( "bold", event.getName() ); event = (SinkEventElement) it.next(); - assertEquals( "rawText", event.getName() ); - assertEquals( "ř", (String) event.getArgs()[0] ); + assertEquals( "text", event.getName() ); + assertEquals( "\u0159", (String) event.getArgs()[0] ); event = (SinkEventElement) it.next(); - assertEquals( "rawText", event.getName() ); - assertEquals( " ", (String) event.getArgs()[0] ); + assertEquals( "text", event.getName() ); + assertEquals( "\u00A0", (String) event.getArgs()[0] ); event = (SinkEventElement) it.next(); - assertEquals( "rawText", event.getName() ); - assertEquals( "š", (String) event.getArgs()[0] ); + assertEquals( "text", event.getName() ); + assertEquals( "\u0161", (String) event.getArgs()[0] ); event = (SinkEventElement) it.next(); - // FIXME: DOXIA-310 - //assertEquals( "rawText", event.getName() ); - //assertEquals( "𝟭", (String) event.getArgs()[0] ); + assertEquals( "unknown", event.getName() ); + assertEquals( "𝟭", (String) event.getArgs()[0] ); event = (SinkEventElement) it.next(); assertEquals( "bold_", event.getName() ); @@ -361,7 +360,7 @@ throws Exception { final String text = "<!DOCTYPE test [<!ENTITY foo \"ř\"><!ENTITY tritPos \"𝟭\">]>" - + "<body><h2>&&foo;</h2><p>&&foo;</p><p>&tritPos;</p></body>"; + + "<body><h2>&&foo;&tritPos;</h2><p>&&foo;&tritPos;</p></body>"; parser.setValidate( false ); parser.parse( text, sink ); @@ -376,8 +375,12 @@ assertEquals( "&", textEvt.getArgs()[0] ); textEvt = (SinkEventElement) it.next(); - assertEquals( "rawText", textEvt.getName() ); - assertEquals( "ř", textEvt.getArgs()[0] ); + assertEquals( "text", textEvt.getName() ); + assertEquals( "\u0159", textEvt.getArgs()[0] ); + + textEvt = (SinkEventElement) it.next(); + assertEquals( "unknown", textEvt.getName() ); + assertEquals( "𝟭", textEvt.getArgs()[0] ); assertEquals( "sectionTitle1_", ( (SinkEventElement) it.next() ).getName() ); assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() ); @@ -387,15 +390,11 @@ assertEquals( "&", textEvt.getArgs()[0] ); textEvt = (SinkEventElement) it.next(); - assertEquals( "rawText", textEvt.getName() ); - assertEquals( "ř", textEvt.getArgs()[0] ); - - assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() ); - //assertEquals( "section1_", ( (SinkEventElement) it.next() ).getName() ); + assertEquals( "text", textEvt.getName() ); + assertEquals( "\u0159", textEvt.getArgs()[0] ); - assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() ); textEvt = (SinkEventElement) it.next(); - assertEquals( "rawText", textEvt.getName() ); + assertEquals( "unknown", textEvt.getName() ); assertEquals( "𝟭", textEvt.getArgs()[0] ); assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() ); Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/SinkTestDocument.java Tue May 5 08:06:19 2009 @@ -596,9 +596,8 @@ sink.paragraph_(); sink.paragraph(); - sink.text( "Copyright symbol: " + COPYRIGHT + ", " + COPYRIGHT + ", " + COPYRIGHT + "." ); + sink.text( "Copyright symbol:" ); + sink.text( "\u00a9" ); sink.paragraph_(); } - - private static final char COPYRIGHT = '\u00a9'; } Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/sink/XhtmlBaseSinkTest.java Tue May 5 08:06:19 2009 @@ -744,7 +744,7 @@ */ public void testText() { - String text = "a text"; + String text = "a text & \u00c6"; try { @@ -756,7 +756,7 @@ sink.close(); } - assertEquals( "a text", writer.toString() ); + assertEquals( "a text & Æ", writer.toString() ); writer = new StringWriter(); @@ -770,7 +770,7 @@ sink.close(); } - assertEquals( "a text", writer.toString() ); + assertEquals( "a text & Æ", writer.toString() ); } /** Modified: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/util/HtmlToolsTest.java Tue May 5 08:06:19 2009 @@ -51,6 +51,22 @@ /** * Verify the expected results. */ + public void testUnescapeHTML() + { + assertNull( HtmlTools.unescapeHtml( null ) ); + assertEquals( "", HtmlTools.unescapeHtml( "" ) ); + assertEquals( "<", HtmlTools.unescapeHtml( "<" ) ); + assertEquals( ">", HtmlTools.unescapeHtml( ">" ) ); + assertEquals( "&", HtmlTools.unescapeHtml( "&" ) ); + assertEquals( "\"", HtmlTools.unescapeHtml( """ ) ); + assertEquals( "&", HtmlTools.unescapeHtml( "&amp;" ) ); + assertEquals( "<Français>", HtmlTools.unescapeHtml( "&lt;Fran&ccedil;ais&gt;" ) ); + assertEquals( "𒍅", HtmlTools.unescapeHtml( "𒍅" ) ); + } + + /** + * Verify the expected results. + */ public void testEncodeId() { assertEquals( HtmlTools.encodeId( null ), null ); Modified: maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java (original) +++ maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java Tue May 5 08:06:19 2009 @@ -36,6 +36,7 @@ import org.apache.maven.doxia.parser.XhtmlBaseParser; import org.apache.maven.doxia.sink.Sink; import org.apache.maven.doxia.sink.SinkEventAttributeSet; +import org.apache.maven.doxia.util.HtmlTools; import org.codehaus.plexus.util.IOUtil; import org.codehaus.plexus.util.StringUtils; @@ -184,7 +185,7 @@ sink.sectionTitle( Sink.SECTION_LEVEL_1, attribs ); - sink.text( parser.getAttributeValue( null, Attribute.NAME.toString() ) ); + sink.text( HtmlTools.unescapeHtml( parser.getAttributeValue( null, Attribute.NAME.toString() ) ) ); sink.sectionTitle1_(); } @@ -203,7 +204,7 @@ sink.sectionTitle( Sink.SECTION_LEVEL_2, attribs ); - sink.text( parser.getAttributeValue( null, Attribute.NAME.toString() ) ); + sink.text( HtmlTools.unescapeHtml( parser.getAttributeValue( null, Attribute.NAME.toString() ) ) ); sink.sectionTitle2_(); } Modified: maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java?rev=771612&r1=771611&r2=771612&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java (original) +++ maven/doxia/doxia/trunk/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java Tue May 5 08:06:19 2009 @@ -454,8 +454,7 @@ SinkEventElement textEvt = (SinkEventElement) it.next(); assertEquals( "text", textEvt.getName() ); - // FIXME: DOXIA-311 - assertEquals( "&ř", textEvt.getArgs()[0] ); + assertEquals( "&\u0159", textEvt.getArgs()[0] ); assertEquals( "sectionTitle1_", ( (SinkEventElement) it.next() ).getName() ); assertEquals( "paragraph", ( (SinkEventElement) it.next() ).getName() ); @@ -465,8 +464,8 @@ assertEquals( "&", textEvt.getArgs()[0] ); textEvt = (SinkEventElement) it.next(); - assertEquals( "rawText", textEvt.getName() ); - assertEquals( "ř", textEvt.getArgs()[0] ); + assertEquals( "text", textEvt.getName() ); + assertEquals( "\u0159", textEvt.getArgs()[0] ); assertEquals( "paragraph_", ( (SinkEventElement) it.next() ).getName() ); assertEquals( "section1_", ( (SinkEventElement) it.next() ).getName() );