Author: ltheussl Date: Tue Jun 2 09:43:33 2009 New Revision: 780986 URL: http://svn.apache.org/viewvc?rev=780986&view=rev Log: Add a DocumentModelSink to extract meta information from documents.
Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java (with props) maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java (with props) Added: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java?rev=780986&view=auto ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java (added) +++ maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java Tue Jun 2 09:43:33 2009 @@ -0,0 +1,328 @@ +package org.apache.maven.doxia.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.text.ParseException; + +import java.util.Date; +import java.util.Enumeration; +import java.util.Locale; + +import javax.swing.text.html.HTML.Attribute; + +import org.apache.maven.doxia.sink.SinkAdapter; +import org.apache.maven.doxia.sink.SinkEventAttributes; +import org.apache.maven.doxia.util.DoxiaUtils; + +import org.codehaus.plexus.util.StringUtils; + +/** + * A Sink that collects meta-information emitted by a parser and stores it in a DocumentModel. + * + * <p>Use like:</p> + * + * <pre> + * DocumentModelSink sink = new DocumentModelSink(); + * parser.parse( reader, sink ); + * DocumentModel model = sink.getModel(); + * </pre> + * + * <p>The sink only collects information from the <code>title()</code>, <code>author</code> + * and <code>date</code> events, as well as meta-information emitted via <code>unknown()</code>, + * all other events are ignored.</p> + * + * @author ltheussl + * @version $Id$ + * @since 1.1.1. + */ + +public class DocumentModelSink + extends SinkAdapter +{ + private final DocumentModel model; + + private StringBuffer buffer; + private DocumentAuthor author; + + /** + * Create a DocumentModelSink. + */ + public DocumentModelSink() + { + this.model = new DocumentModel(); + model.setMeta( new DocumentMeta() ); + } + + /** + * Retrieve the DocumentModel created by this Sink. + * + * @return the DocumentModel. + */ + public DocumentModel getModel() + { + return model; + } + + /** Start recording a title. */ + public void title() + { + title( null ); + } + + /** + * Start recording a title. Only text events within a title event are recorded. + * + * @param attributes ignored. + */ + public void title( SinkEventAttributes attributes ) + { + this.buffer = new StringBuffer(); + } + + /** End recording a title. */ + public void title_() + { + String title = buffer.toString(); + + if ( StringUtils.isNotEmpty( title ) ) + { + getModel().getMeta().setTitle( buffer.toString() ); + } + + this.buffer = null; + } + + /** Start recording an author. */ + public void author() + { + author( null ); + } + + /** + * Start recording an author. Only text events within an author event are recorded. + * + * @param attributes only email attribute is recognized. + */ + public void author( SinkEventAttributes attributes ) + { + this.buffer = new StringBuffer(); + this.author = new DocumentAuthor(); + + if ( attributes != null ) + { + for ( Enumeration e = attributes.getAttributeNames() ; e.hasMoreElements() ; ) + { + String name = e.nextElement().toString(); + + if ( name.equals( SinkEventAttributes.EMAIL ) ) + { + author.setEmail( attributes.getAttribute( name ).toString() ); + } + else + { + getLog().warn( "Ignoring unknown author attribute: " + name ); + } + } + } + } + + /** End recording an author. */ + public void author_() + { + String auth = buffer.toString(); + + if ( StringUtils.isNotEmpty( auth ) ) + { + author.setName( buffer.toString() ); + model.getMeta().addAuthor( author ); + } + + this.author = null; + this.buffer = null; + } + + /** Start recording a date. */ + public void date() + { + date( null ); + } + + /** + * Start recording a date. Only text events within a date event are recorded. + * + * @param attributes ignored. + */ + public void date( SinkEventAttributes attributes ) + { + this.buffer = new StringBuffer(); + } + + /** End recording a date. */ + public void date_() + { + String dat = buffer.toString(); + + if ( StringUtils.isNotEmpty( dat ) ) + { + try + { + Date date = DoxiaUtils.parseDate( buffer.toString() ); + model.getMeta().setDate( date ); + } + catch ( ParseException ex ) + { + getLog().warn( "Could not parse date: " + this.buffer.toString(), ex ); + } + } + + this.buffer = null; + } + + /** + * Record a text. + * + * @param text the text to record. + */ + public void text( String text ) + { + text( text, null ); + } + + /** + * Record a text. + * + * @param text the text to record. + * @param attributes ignored. + */ + public void text( String text, SinkEventAttributes attributes ) + { + if ( this.buffer != null && StringUtils.isNotEmpty( text ) ) + { + this.buffer.append( text ); + } + } + + /** + * Record a text. + * + * @param text the text to record. + */ + public void rawText( String text ) + { + if ( this.buffer != null && StringUtils.isNotEmpty( text ) ) + { + this.buffer.append( text ); + } + } + + /** + * Record an unknown event. Only "meta" events are currently recognized. + * + * @param name the name of the event. If this is not "meta", the event is ignored. + * @param requiredParams ignored. + * @param attributes has to contain "name" and "content" attributes. + */ + public void unknown( String name, Object[] requiredParams, SinkEventAttributes attributes ) + { + if ( "meta".equals( name ) ) + { + Object metaName = attributes.getAttribute( Attribute.NAME.toString() ); + Object metaContent = attributes.getAttribute( Attribute.CONTENT.toString() ); + + if ( metaName == null || metaContent == null ) + { + getLog().warn( "Missing name and/or content in meta, ignoring!" ); + return; + } + + handleMeta( metaName.toString().toLowerCase( Locale.ENGLISH ), + metaContent.toString().toLowerCase( Locale.ENGLISH ) ); + } + } + + private boolean handleMeta( String name, String content ) + { + if ( "author".equals( name ) ) + { + this.author( null ); + this.text( content ); + this.author_(); + } + else if ( "date".equals( name ) ) + { + this.date( null ); + this.text( content ); + this.date_(); + } + else if ( "keywords".equals( name ) ) + { + String[] keywords = StringUtils.split( content, "," ); + + for ( int i = 0; i < keywords.length; i++ ) + { + model.getMeta().addKeyWord( keywords[i].trim() ); + } + } + else if ( "description".equals( name ) ) + { + model.getMeta().setDescription( content ); + } + else if ( "generator".equals( name ) ) + { + model.getMeta().setGenerator( content ); + } + else if ( "language".equals( name ) || "lang".equals( name ) ) + { + model.getMeta().setLanguage( content ); + } + else if ( "creator".equals( name ) ) + { + model.getMeta().setCreator( content ); + } + else if ( "creation_date".equals( name ) ) + { + try + { + model.getMeta().setCreationDate( DoxiaUtils.parseDate( content ) ); + } + catch ( ParseException ex ) + { + getLog().warn( "Could not parse date: " + content, ex ); + } + } + else if ( "date-creation-yyyymmdd".equals( name ) ) + { + try + { + model.getMeta().setCreationDate( DoxiaUtils.parseDate( content ) ); + } + catch ( ParseException ex ) + { + getLog().warn( "Could not parse date: " + content, ex ); + } + } + else + { + getLog().warn( "Unknown meta: " + name ); + } + + return false; + } +} Propchange: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/document/DocumentModelSink.java ------------------------------------------------------------------------------ svn:keywords = "Author Date Id Revision" Added: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java URL: http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java?rev=780986&view=auto ============================================================================== --- maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java (added) +++ maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java Tue Jun 2 09:43:33 2009 @@ -0,0 +1,196 @@ +package org.apache.maven.doxia.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Iterator; +import java.util.List; + +import org.apache.maven.doxia.sink.SinkEventAttributeSet; + +import org.codehaus.plexus.PlexusTestCase; + +/** + * Test DocumentModelSink. + * + * @author ltheussl + * @version $Id$ + * @since 1.1.1 + */ +public class DocumentModelSinkTest + extends PlexusTestCase +{ + /** + * Test of title method, of class DocumentModelSink. + */ + public void testTitle() + { + final DocumentModelSink sink = new DocumentModelSink(); + + sink.title(); + sink.rawText( "" ); + sink.title_(); + + assertNull( sink.getModel().getMeta().getTitle() ); + + sink.title(); + sink.rawText( "Title" ); + sink.title_(); + + assertEquals( "Title", sink.getModel().getMeta().getTitle() ); + } + + /** + * Test of author method, of class DocumentModelSink. + */ + public void testAuthor() + { + final DocumentModelSink sink = new DocumentModelSink(); + final SinkEventAttributeSet email = + new SinkEventAttributeSet( new String[] {"email", "y...@com", "hobby", "breathing"} ); + + sink.author(); + sink.text( "Author" ); + sink.text( "" ); + sink.author_(); + + sink.author(); + sink.text( "" ); + sink.author_(); + + sink.author( email ); + sink.text( "Author with email" ); + sink.rawText( "" ); + sink.author_(); + + assertEquals( "Author, Author with email", sink.getModel().getMeta().getAllAuthorNames() ); + + final List authors = sink.getModel().getMeta().getAuthors(); + assertEquals( 2, authors.size() ); + + for ( final Iterator it = authors.iterator(); it.hasNext(); ) + { + final DocumentAuthor author = (DocumentAuthor) it.next(); + final String name = author.getName(); + assertTrue( "Author".equals( name ) || "Author with email".equals( name ) ); + + if ( "Author with email".equals( name ) ) + { + assertEquals( "y...@com", author.getEmail() ); + } + } + } + + /** + * Test of date method, of class DocumentModelSink. + */ + public void testDate() + { + final DocumentModelSink sink = new DocumentModelSink(); + + sink.date(); + sink.text( "" ); + sink.date_(); + + assertNull( sink.getModel().getMeta().getDate() ); + + sink.date(); + sink.text( "heute" ); + sink.date_(); + + assertNull( sink.getModel().getMeta().getDate() ); + + sink.date(); + sink.text( "1973-02-27" ); + sink.date_(); + + final long feb27 = 99615600000L; + assertEquals( feb27, sink.getModel().getMeta().getDate().getTime() ); + } + + /** + * Test of unknown method, of class DocumentModelSink. + */ + public void testUnknown() + { + final String id = "meta"; + final String name = "name"; + final String content = "content"; + + final SinkEventAttributeSet meta = + new SinkEventAttributeSet( new String[] {name, "generator", content, "me"} ); + + final DocumentModelSink sink = new DocumentModelSink(); + + sink.unknown( id, null, meta ); + assertEquals( "me", sink.getModel().getMeta().getGenerator() ); + + meta.addAttribute( name, "lang" ); + meta.addAttribute( content, "en-us" ); + sink.unknown( id, null, meta ); + assertEquals( "en-us", sink.getModel().getMeta().getLanguage() ); + + meta.addAttribute( name, "language" ); + meta.addAttribute( content, "de-at" ); + sink.unknown( id, null, meta ); + assertEquals( "de-at", sink.getModel().getMeta().getLanguage() ); + + meta.addAttribute( name, "creator" ); + meta.addAttribute( content, "yo" ); + sink.unknown( id, null, meta ); + assertEquals( "yo", sink.getModel().getMeta().getCreator() ); + + meta.addAttribute( name, "creation_date" ); + meta.addAttribute( content, "today" ); + sink.unknown( id, null, meta ); + assertNotNull( sink.getModel().getMeta().getCreationDate() ); + + sink.getModel().getMeta().setCreationDate( null ); + meta.addAttribute( name, "date-creation-yyyymmdd" ); + meta.addAttribute( content, "20000101" ); + sink.unknown( id, null, meta ); + assertNotNull( sink.getModel().getMeta().getCreationDate() ); + + meta.addAttribute( name, "description" ); + meta.addAttribute( content, "hot air" ); + sink.unknown( id, null, meta ); + assertEquals( "hot air", sink.getModel().getMeta().getDescription() ); + + meta.addAttribute( name, "keywords" ); + meta.addAttribute( content, "a, b, c" ); + sink.unknown( id, null, meta ); + assertEquals( 3, sink.getModel().getMeta().getKeyWords().size() ); + assertEquals( "a, b, c", sink.getModel().getMeta().getAllKeyWords() ); + + meta.addAttribute( name, "date" ); + meta.addAttribute( content, "today" ); + sink.unknown( id, null, meta ); + assertNotNull( sink.getModel().getMeta().getDate() ); + + meta.addAttribute( name, "author" ); + meta.addAttribute( content, "me" ); + sink.unknown( id, null, meta ); + assertEquals( "me", sink.getModel().getMeta().getAllAuthorNames() ); + + meta.addAttribute( name, "unknownmeta" ); + meta.addAttribute( content, "unknowncontent" ); + sink.unknown( id, null, meta ); + // unknown meta should log a warning + } +} Propchange: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: maven/doxia/doxia/trunk/doxia-core/src/test/java/org/apache/maven/doxia/document/DocumentModelSinkTest.java ------------------------------------------------------------------------------ svn:keywords = "Author Date Id Revision"