Author: vsiveton
Date: Sun Nov  2 16:51:25 2008
New Revision: 709994

URL: http://svn.apache.org/viewvc?rev=709994&view=rev
Log:
DOXIA-250: Xml parser should handle entities defined in doctype

o fixed the AbstractXmlParser to handle defined entities
o added a test case

Modified:
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
    
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
    
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=709994&r1=709993&r2=709994&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
 Sun Nov  2 16:51:25 2008
@@ -21,12 +21,15 @@
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.maven.doxia.macro.MacroExecutionException;
 import org.apache.maven.doxia.markup.XmlMarkup;
 import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
-
 import org.codehaus.plexus.util.StringUtils;
 import org.codehaus.plexus.util.xml.pull.MXParser;
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
@@ -43,12 +46,22 @@
     extends AbstractParser
     implements XmlMarkup
 {
+    /** Entity pattern for HTML entity, i.e.   */
+    private static final Pattern PATTERN_ENTITY_1 =
+        Pattern.compile( 
"<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
+
+    /** Entity pattern for Unicode entity, i.e. &#38;#38; */
+    private static final Pattern PATTERN_ENTITY_2 =
+        Pattern.compile( 
"<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&#x?[0-9a-fA-F]{1,4};)(\\s)*\"(\\s)*>"
 );
+
     private boolean ignorable;
 
     private boolean collapsible;
 
     private boolean trimmable;
 
+    private Map entities;
+
     /** [EMAIL PROTECTED] */
     public void parse( Reader source, Sink sink )
         throws ParseException
@@ -65,8 +78,8 @@
         }
         catch ( XmlPullParserException ex )
         {
-            throw new ParseException( "Error parsing the model: " + 
ex.getMessage(), ex, ex.getLineNumber(), ex
-                .getColumnNumber() );
+            throw new ParseException( "Error parsing the model: " + 
ex.getMessage(), ex, ex.getLineNumber(),
+                                      ex.getColumnNumber() );
         }
         catch ( MacroExecutionException ex )
         {
@@ -180,7 +193,44 @@
             }
             else if ( eventType == XmlPullParser.DOCDECL )
             {
-                // nop
+                String text = parser.getText();
+                int entitiesCount = StringUtils.countMatches( text, "<!ENTITY" 
);
+                // entities defined in a local doctype
+                if ( entitiesCount > 0 )
+                {
+                    int start = text.indexOf( "<" );
+                    int end = text.lastIndexOf( ">" );
+                    if ( start != -1 && end != -1 )
+                    {
+                        text = text.substring( start, end + 1 );
+                        for ( int i = 0; i < entitiesCount; i++ )
+                        {
+                            String tmp = text.substring( text.indexOf( "<" ), 
text.indexOf( ">" ) + 1 );
+                            Matcher matcher = PATTERN_ENTITY_1.matcher( tmp );
+                            if ( matcher.find() && matcher.groupCount() == 7 )
+                            {
+                                String entityName = matcher.group( 2 );
+                                String entityValue = matcher.group( 5 );
+
+                                parser.defineEntityReplacementText( 
entityName, entityValue );
+                                getLocalEntities().put( entityName, 
entityValue );
+                            }
+                            else
+                            {
+                                matcher = PATTERN_ENTITY_2.matcher( text );
+                                if ( matcher.find() && matcher.groupCount() == 
7 )
+                                {
+                                    String entityName = matcher.group( 2 );
+                                    String entityValue = matcher.group( 5 );
+
+                                    parser.defineEntityReplacementText( 
entityName, entityValue );
+                                    getLocalEntities().put( entityName, 
entityValue );
+                                }
+                            }
+                            text = StringUtils.replace( text, tmp, "" ).trim();
+                        }
+                    }
+                }
             }
 
             try
@@ -355,4 +405,25 @@
 
         return text;
     }
+
+    /**
+     * Return the defined entities in a local doctype, i.e.:
+     * <pre>
+     * &lt;!DOCTYPE foo [
+     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
+     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
+     * ]&gt;
+     * </pre>
+     *
+     * @return a map of the defined entities in a local doctype.
+     */
+    protected Map getLocalEntities()
+    {
+        if ( entities == null )
+        {
+            entities = new LinkedHashMap();
+        }
+
+        return entities;
+    }
 }

Modified: 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=709994&r1=709993&r2=709994&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
 Sun Nov  2 16:51:25 2008
@@ -698,7 +698,14 @@
         }
         else
         {
-            sink.text( text );
+            if ( getLocalEntities().containsKey( textChars ) )
+            {
+                sink.rawText( text );
+            }
+            else
+            {
+                sink.text( text );
+            }
         }
     }
 

Modified: 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java?rev=709994&r1=709993&r2=709994&view=diff
==============================================================================
--- 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
 (original)
+++ 
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
 Sun Nov  2 16:51:25 2008
@@ -19,10 +19,12 @@
  * under the License.
  */
 
+import java.io.StringWriter;
 import java.util.Iterator;
 
 import org.apache.maven.doxia.parser.AbstractParserTest;
 import org.apache.maven.doxia.parser.Parser;
+import org.apache.maven.doxia.sink.Sink;
 import org.apache.maven.doxia.sink.SinkEventElement;
 import org.apache.maven.doxia.sink.SinkEventTestingSink;
 
@@ -104,4 +106,32 @@
         assertFalse( it.hasNext() );
     }
 
+    /**
+     * @throws Exception if any
+     */
+    public void testDoxia250()
+        throws Exception
+    {
+        StringBuffer sb = new StringBuffer();
+        sb.append( "<!DOCTYPE test [" ).append( EOL );
+        sb.append( "<!ENTITY   " ).append( EOL ).append( "   foo   " ).append( 
EOL ).append( "   \"   " )
+          .append( EOL ).append( "   &#x159;   " ).append( EOL ).append( "   
\">" ).append( EOL );
+        sb.append( "<!ENTITY   " ).append( EOL ).append( "   foo1   " 
).append( EOL ).append( "   \"   " )
+          .append( EOL ).append( "   &nbsp;   " ).append( EOL ).append( "   
\">" ).append( EOL );
+        sb.append( "<!ENTITY   " ).append( EOL ).append( "   foo2   " 
).append( EOL ).append( "  \"   " )
+          .append( EOL ).append( "   &#x161;   " ).append( EOL ).append( "   
\">" ).append( EOL );
+        sb.append( "]>" ).append( EOL );
+        sb.append( "<html><body>&foo;&foo1;&foo2;</body></html>" );
+
+        String text = sb.toString();
+        StringWriter w = new StringWriter();
+        Sink sink = new XhtmlSink( w );
+        // Should fail when fixing DOXIA-263 I guess.
+        ( (XhtmlParser) createParser() ).parse( text.toString(), sink );
+        String result = w.toString();
+
+        assertTrue( result.indexOf( "&#x159;" ) != -1 );
+        assertTrue( result.indexOf( "&nbsp;" ) != -1 );
+        assertTrue( result.indexOf( "&#x161;" ) != -1 );
+    }
 }


Reply via email to