Author: ltheussl Date: Mon May 18 11:36:57 2009 New Revision: 775901 URL: http://svn.apache.org/viewvc?rev=775901&view=rev Log: Move default implementations for handleText, handleCdsect, handleComment, handleEntity and handleUnknown from XhtmlBaseParser into AbstractXmlParser. Only handleStartTag and handleEndTag, which are parser-specific, remain abstract.
Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=775901&r1=775900&r2=775901&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java Mon May 18 11:36:57 2009 @@ -41,15 +41,19 @@ import org.apache.maven.doxia.logging.Log; import org.apache.maven.doxia.macro.MacroExecutionException; +import org.apache.maven.doxia.markup.HtmlMarkup; import org.apache.maven.doxia.markup.XmlMarkup; import org.apache.maven.doxia.sink.Sink; import org.apache.maven.doxia.sink.SinkEventAttributeSet; +import org.apache.maven.doxia.util.HtmlTools; + import org.codehaus.plexus.util.FileUtils; import org.codehaus.plexus.util.IOUtil; import org.codehaus.plexus.util.StringUtils; import org.codehaus.plexus.util.xml.pull.MXParser; import org.codehaus.plexus.util.xml.pull.XmlPullParser; import org.codehaus.plexus.util.xml.pull.XmlPullParserException; + import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -305,42 +309,129 @@ /** * Handles text events. * + * <p>This is a default implementation, if the parser points to a non-empty text element, + * it is emitted as a text event into the specified sink.</p> + * * @param parser A parser, not null. - * @param sink the sink to receive the events. + * @param sink the sink to receive the events. Not null. * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model */ - protected abstract void handleText( XmlPullParser parser, Sink sink ) - throws XmlPullParserException; + protected void handleText( XmlPullParser parser, Sink sink ) + throws XmlPullParserException + { + String text = getText( parser ); + + /* + * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the + * parser so any whitespace that makes it here is significant. + */ + if ( StringUtils.isNotEmpty( text ) ) + { + sink.text( text ); + } + } /** * Handles CDATA sections. * + * <p>This is a default implementation, all data are emitted as text + * events into the specified sink.</p> + * * @param parser A parser, not null. - * @param sink the sink to receive the events. + * @param sink the sink to receive the events. Not null. * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model */ - protected abstract void handleCdsect( XmlPullParser parser, Sink sink ) - throws XmlPullParserException; + protected void handleCdsect( XmlPullParser parser, Sink sink ) + throws XmlPullParserException + { + sink.text( getText( parser ) ); + } /** * Handles comments. * + * <p>This is a default implementation, all data are emitted as comment + * events into the specified sink.</p> + * * @param parser A parser, not null. - * @param sink the sink to receive the events. + * @param sink the sink to receive the events. Not null. * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model */ - protected abstract void handleComment( XmlPullParser parser, Sink sink ) - throws XmlPullParserException; + protected void handleComment( XmlPullParser parser, Sink sink ) + throws XmlPullParserException + { + sink.comment( getText( parser ).trim() ); + } /** * Handles entities. * + * <p>This is a default implementation, all entities are resolved and emitted as text + * events into the specified sink, except:</p> + * <ul> + * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code> + * are emitted as <code>nonBreakingSpace()</code> events.</li> + * <li>if an entity cannot be resolved, it is emitted as an <code>unknown()</code> event, + * with a required parameter that contains {...@link HtmlMarkup#ENTITY_TYPE} as first argument.</li> + * </ul> + * * @param parser A parser, not null. - * @param sink the sink to receive the events. + * @param sink the sink to receive the events. Not null. * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model */ - protected abstract void handleEntity( XmlPullParser parser, Sink sink ) - throws XmlPullParserException; + protected void handleEntity( XmlPullParser parser, Sink sink ) + throws XmlPullParserException + { + String text = getText( parser ); + + String name = parser.getName(); + + if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) ) + { + sink.nonBreakingSpace(); + } + else + { + String unescaped = HtmlTools.unescapeHtml( text ); + + // TODO: StringEscapeUtils.unescapeHtml returns unknown entities as is, + // they should be handled as one character as well + if ( text.equals( unescaped ) && text.length() > 1 ) + { + // this means the entity is unrecognized: emit as unknown + Object[] required = new Object[] { new Integer( HtmlMarkup.ENTITY_TYPE ) }; + + sink.unknown( text, required, null ); + } + else + { + sink.text( unescaped ); + } + } + } + + /** + * Handles an unkown event. + * + * <p>This is a default implementation, all events are emitted as unknown + * events into the specified sink.</p> + * + * @param parser the parser to get the event from. + * @param sink the sink to receive the event. + * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE, + * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE. + * It will be passed as the first argument of the required parameters to the Sink + * {...@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], SinkEventAttributes)} + * method. + */ + protected void handleUnknown( XmlPullParser parser, Sink sink, int type ) + { + Object[] required = new Object[] { new Integer( type ) }; + + SinkEventAttributeSet attribs = getAttributesFromParser( parser ); + + sink.unknown( parser.getName(), required, attribs ); + } /** * <p>isIgnorableWhitespace</p> Modified: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=775901&r1=775900&r2=775901&view=diff ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java (original) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java Mon May 18 11:36:57 2009 @@ -23,12 +23,10 @@ import javax.swing.text.html.HTML.Tag; import org.apache.maven.doxia.macro.MacroExecutionException; -import org.apache.maven.doxia.markup.HtmlMarkup; import org.apache.maven.doxia.sink.Sink; import org.apache.maven.doxia.sink.SinkEventAttributeSet; import org.apache.maven.doxia.sink.SinkEventAttributes; import org.apache.maven.doxia.util.DoxiaUtils; -import org.apache.maven.doxia.util.HtmlTools; import org.codehaus.plexus.util.StringUtils; import org.codehaus.plexus.util.xml.pull.XmlPullParser; @@ -431,13 +429,6 @@ } /** {...@inheritdoc} */ - protected void handleCdsect( XmlPullParser parser, Sink sink ) - throws XmlPullParserException - { - sink.text( getText( parser ) ); - } - - /** {...@inheritdoc} */ protected void handleComment( XmlPullParser parser, Sink sink ) throws XmlPullParserException { @@ -453,60 +444,6 @@ } } - /** {...@inheritdoc} */ - protected void handleEntity( XmlPullParser parser, Sink sink ) - throws XmlPullParserException - { - String text = getText( parser ); - - int[] holder = new int[] {0, 0}; - char[] chars = parser.getTextCharacters( holder ); - String textChars = String.valueOf( chars, holder[0], holder[1] ); - - if ( "#160".equals( textChars ) || "nbsp".equals( textChars ) ) - { - sink.nonBreakingSpace(); - } - else - { - String unescaped = HtmlTools.unescapeHtml( text ); - - // TODO: StringEscapeUtils.unescapeHtml returns unknown entities as is, - // they should be handled as one character as well - if ( text.equals( unescaped ) && text.length() > 1 ) - { - // this means the entity is unrecognized: emit as unknown - Object[] required = new Object[] { new Integer( HtmlMarkup.ENTITY_TYPE ) }; - - sink.unknown( text, required, null ); - } - else - { - sink.text( unescaped ); - } - } - } - - /** - * Handles an unkown event. - * - * @param parser the parser to get the event from. - * @param sink the sink to receive the event. - * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE, - * HtmlMarkup.TAG_TYPE_START or HtmlMarkup.TAG_TYPE_END. It will be passed as the first - * argument of the required parameters to the Sink - * {...@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], SinkEventAttributes)} - * method. - */ - protected void handleUnknown( XmlPullParser parser, Sink sink, int type ) - { - Object[] required = new Object[] { new Integer( type ) }; - - SinkEventAttributeSet attribs = getAttributesFromParser( parser ); - - sink.unknown( parser.getName(), required, attribs ); - } - /** * Make sure sections are nested consecutively. *