include/oox/core/fasttokenhandler.hxx | 7 ++- include/oox/token/tokenmap.hxx | 22 ++++++++++ include/sax/fastattribs.hxx | 14 ++++++ oox/source/core/fasttokenhandler.cxx | 5 ++ oox/source/helper/attributelist.cxx | 13 ++++++ oox/source/token/tokenmap.cxx | 12 ----- sax/source/fastparser/fastparser.cxx | 42 ++++++++++++-------- sax/source/fastparser/fastparser.hxx | 1 sax/source/tools/fastattribs.cxx | 30 +++++--------- writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx | 23 +++++----- writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx | 7 ++- 11 files changed, 117 insertions(+), 59 deletions(-)
New commits: commit f3d1f950aa259ebe8e3b8c65091e5b6e4462e0e3 Author: Michael Meeks <[email protected]> Date: Tue Nov 26 17:26:01 2013 +0000 oox: accelerate common case boolean reading. diff --git a/oox/source/helper/attributelist.cxx b/oox/source/helper/attributelist.cxx index 2efc3a3..8f0707a 100644 --- a/oox/source/helper/attributelist.cxx +++ b/oox/source/helper/attributelist.cxx @@ -201,6 +201,19 @@ OptValue< sal_Int32 > AttributeList::getIntegerHex( sal_Int32 nAttrToken ) const OptValue< bool > AttributeList::getBool( sal_Int32 nAttrToken ) const { + const char *pAttr; + + // catch the common cases as quickly as possible first + bool bHasAttr = getAttribList()->getAsChar( nAttrToken, pAttr ); + if( !bHasAttr ) + return OptValue< bool >(); + if( !strcmp( pAttr, "false" ) ) + return OptValue< bool >( false ); + if( !strcmp( pAttr, "true" ) ) + return OptValue< bool >( true ); + + // now for all the crazy stuff + // boolean attributes may be "t", "f", "true", "false", "on", "off", "1", or "0" switch( getToken( nAttrToken, XML_TOKEN_INVALID ) ) { commit 30a8fbbf1b7bdc849d88445a28e86e03e5dc2166 Author: Michael Meeks <[email protected]> Date: Tue Nov 26 17:25:37 2013 +0000 fastparser: special case xmlns more sensibly. diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 4c75e0c..1bfb15e 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -949,6 +949,10 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char { assert(awAttributes[i+1]); + if( awAttributes[i][0] != 'x' || + strncmp( awAttributes[i], "xmlns") != 0 ) + continue; + splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen ); if( nPrefixLen ) { commit d7280af9740819971f46a855108334b84778b47d Author: Michael Meeks <[email protected]> Date: Tue Nov 26 16:26:12 2013 +0000 fastparser: Avoid copying all tokens into a sequence. diff --git a/include/oox/core/fasttokenhandler.hxx b/include/oox/core/fasttokenhandler.hxx index b79472c..fb4e0e1 100644 --- a/include/oox/core/fasttokenhandler.hxx +++ b/include/oox/core/fasttokenhandler.hxx @@ -23,6 +23,7 @@ #include <com/sun/star/lang/XServiceInfo.hpp> #include <com/sun/star/xml/sax/XFastTokenHandler.hpp> #include <cppuhelper/implbase2.hxx> +#include <sax/fastattribs.hxx> namespace oox { class TokenMap; } @@ -36,7 +37,8 @@ typedef ::cppu::WeakImplHelper2< ::com::sun::star::lang::XServiceInfo, ::com::su /** Wrapper implementing the com.sun.star.xml.sax.XFastTokenHandler API interface that provides access to the tokens generated from the internal token name list. */ -class FastTokenHandler : public FastTokenHandler_BASE +class FastTokenHandler : public FastTokenHandler_BASE, + public sax_fastparser::FastTokenHandlerBase { public: explicit FastTokenHandler(); @@ -53,6 +55,9 @@ public: virtual ::com::sun::star::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) throw (::com::sun::star::uno::RuntimeException); virtual sal_Int32 SAL_CALL getTokenFromUTF8( const ::com::sun::star::uno::Sequence< sal_Int8 >& Identifier ) throw (::com::sun::star::uno::RuntimeException); + // Much faster direct C++ shortcut to the method that matters + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const; + private: const TokenMap& mrTokenMap; ///< Reference to global token map singleton. }; diff --git a/include/oox/token/tokenmap.hxx b/include/oox/token/tokenmap.hxx index 495fa2d..bddc0ce 100644 --- a/include/oox/token/tokenmap.hxx +++ b/include/oox/token/tokenmap.hxx @@ -47,9 +47,29 @@ public: /** Returns the token identifier for the passed UTF8 token name. */ sal_Int32 getTokenFromUtf8( - const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const; + const ::com::sun::star::uno::Sequence< sal_Int8 >& rUtf8Name ) const + { + return getTokenFromUTF8( reinterpret_cast< const char * >( + rUtf8Name.getConstArray() ), + rUtf8Name.getLength() ); + } + + /** Returns the token identifier for a UTF8 string passed in pToken */ + sal_Int32 getTokenFromUTF8( const char *pToken, sal_Int32 nLength ) const + { + // 50% of OOXML tokens are primarily 1 lower-case character, a-z + if( nLength == 1) + { + sal_Char c = pToken[0]; + if (c >= 'a' && c <= 'z') + return mnAlphaTokens[ c - 'a' ]; + } + return getTokenPerfectHash( pToken, nLength ); + } private: + sal_Int32 getTokenPerfectHash( const char *pToken, sal_Int32 nLength ) const; + struct TokenName { OUString maUniName; diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx index 42b285c..f1f64dd 100644 --- a/include/sax/fastattribs.hxx +++ b/include/sax/fastattribs.hxx @@ -49,6 +49,14 @@ struct UnknownAttribute typedef std::vector< UnknownAttribute > UnknownAttributeList; +/// A native C++ interface to tokenisation +class SAX_DLLPUBLIC FastTokenHandlerBase +{ + public: + virtual ~FastTokenHandlerBase() {} + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const = 0; +}; + /// avoid constantly allocating and freeing sequences. class SAX_DLLPUBLIC FastTokenLookup { @@ -58,13 +66,15 @@ public: FastTokenLookup(); sal_Int32 getTokenFromChars( const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &mxTokenHandler, + FastTokenHandlerBase *pTokenHandler, const char *pStr, size_t nLength = 0 ); }; class SAX_DLLPUBLIC FastAttributeList : public ::cppu::WeakImplHelper1< ::com::sun::star::xml::sax::XFastAttributeList > { public: - FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler ); + FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler, + FastTokenHandlerBase *pOptHandlerBase = NULL ); virtual ~FastAttributeList(); void clear(); @@ -100,6 +110,8 @@ private: std::vector< sal_Int32 > maAttributeTokens; UnknownAttributeList maUnknownAttributes; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; + FastTokenHandlerBase *mpTokenHandler; + FastTokenLookup maTokenLookup; }; diff --git a/oox/source/core/fasttokenhandler.cxx b/oox/source/core/fasttokenhandler.cxx index f57739c..5102408 100644 --- a/oox/source/core/fasttokenhandler.cxx +++ b/oox/source/core/fasttokenhandler.cxx @@ -92,6 +92,11 @@ sal_Int32 FastTokenHandler::getTokenFromUTF8( const Sequence< sal_Int8 >& rIdent return mrTokenMap.getTokenFromUtf8( rIdentifier ); } +sal_Int32 FastTokenHandler::getTokenDirect( const char *pToken, sal_Int32 nLength ) const +{ + return mrTokenMap.getTokenFromUTF8( pToken, nLength ); +} + // ============================================================================ } // namespace core diff --git a/oox/source/token/tokenmap.cxx b/oox/source/token/tokenmap.cxx index ea3e621e..7728b26 100644 --- a/oox/source/token/tokenmap.cxx +++ b/oox/source/token/tokenmap.cxx @@ -113,17 +113,9 @@ Sequence< sal_Int8 > TokenMap::getUtf8TokenName( sal_Int32 nToken ) const return Sequence< sal_Int8 >(); } -sal_Int32 TokenMap::getTokenFromUtf8( const Sequence< sal_Int8 >& rUtf8Name ) const +sal_Int32 TokenMap::getTokenPerfectHash( const char *pStr, sal_Int32 nLength ) const { - // 50% of OOXML tokens are primarily 1 lower-case character, a-z - if( rUtf8Name.getLength() == 1) - { - sal_Char c = rUtf8Name[0]; - if (c >= 'a' && c <= 'z') - return mnAlphaTokens[ c - 'a' ]; - } - struct xmltoken* pToken = Perfect_Hash::in_word_set( - reinterpret_cast< const char* >( rUtf8Name.getConstArray() ), rUtf8Name.getLength() ); + struct xmltoken* pToken = Perfect_Hash::in_word_set( pStr, nLength ); return pToken ? pToken->nToken : XML_TOKEN_INVALID; } diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 415747c..4c75e0c 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -190,6 +190,7 @@ OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) // -------------------------------------------------------------------- ParserData::ParserData() + : mpTokenHandler( NULL ) {} ParserData::~ParserData() @@ -382,7 +383,9 @@ void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNa sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ ) { - return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, pToken, nLen ); + return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, + getEntity().mpTokenHandler, + pToken, nLen ); } // -------------------------------------------------------------------- @@ -623,9 +626,10 @@ void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandle maData.mxDocumentHandler = Handler; } -void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException) +void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& xHandler ) throw (RuntimeException) { - maData.mxTokenHandler = Handler; + maData.mxTokenHandler = xHandler; + maData.mpTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ); } void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException) @@ -918,7 +922,9 @@ void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char if (rEvent.mxAttributes.is()) rEvent.mxAttributes->clear(); else - rEvent.mxAttributes.set( new FastAttributeList( rEntity.mxTokenHandler ) ); + rEvent.mxAttributes.set( + new FastAttributeList( rEntity.mxTokenHandler, + rEntity.mpTokenHandler ) ); sal_Int32 nNameLen, nPrefixLen; const XML_Char *pName; diff --git a/sax/source/fastparser/fastparser.hxx b/sax/source/fastparser/fastparser.hxx index 35deb0c..d2bf1a7 100644 --- a/sax/source/fastparser/fastparser.hxx +++ b/sax/source/fastparser/fastparser.hxx @@ -92,6 +92,7 @@ struct ParserData { ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; + FastTokenHandlerBase *mpTokenHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler > mxErrorHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver > mxEntityResolver; ::com::sun::star::lang::Locale maLocale; diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx index ee65cc6..bc63b3e 100644 --- a/sax/source/tools/fastattribs.cxx +++ b/sax/source/tools/fastattribs.cxx @@ -47,8 +47,10 @@ void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const } } -FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler ) -: mxTokenHandler( xTokenHandler ) +FastAttributeList::FastAttributeList( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xTokenHandler, + sax_fastparser::FastTokenHandlerBase *pTokenHandler) +: mxTokenHandler( xTokenHandler ), + mpTokenHandler( pTokenHandler ) { // random initial size of buffer to store attribute values mnChunkLength = 58; @@ -114,7 +116,7 @@ sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) throw (SAXExcept { for (size_t i = 0; i < maAttributeTokens.size(); ++i) if (maAttributeTokens[i] == Token) - return maTokenLookup.getTokenFromChars( mxTokenHandler, + return maTokenLookup.getTokenFromChars( mxTokenHandler, mpTokenHandler, mpChunk + maAttributeValues[ i ], AttributeValueLength( i ) ); @@ -125,7 +127,7 @@ sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int { for (size_t i = 0; i < maAttributeTokens.size(); ++i) if (maAttributeTokens[i] == Token) - return maTokenLookup.getTokenFromChars( mxTokenHandler, + return maTokenLookup.getTokenFromChars( mxTokenHandler, mpTokenHandler, mpChunk + maAttributeValues[ i ], AttributeValueLength( i ) ); @@ -227,6 +229,7 @@ FastTokenLookup::FastTokenLookup() */ sal_Int32 FastTokenLookup::getTokenFromChars( const ::css::uno::Reference< ::css::xml::sax::XFastTokenHandler > &xTokenHandler, + FastTokenHandlerBase *pTokenHandler, const char *pToken, size_t nLen /* = 0 */ ) { sal_Int32 nRet; @@ -234,23 +237,12 @@ sal_Int32 FastTokenLookup::getTokenFromChars( if( !nLen ) nLen = strlen( pToken ); - if ( static_cast<sal_Int32>(nLen) < mnUtf8BufferSize ) - { - // Get intimate with the underlying sequence cf. sal/types.h - sal_Sequence *pSeq = maUtf8Buffer.get(); - - sal_Int32 nPreRefCount = pSeq->nRefCount; - - pSeq->nElements = nLen; - memcpy( pSeq->elements, pToken, nLen ); - nRet = xTokenHandler->getTokenFromUTF8( maUtf8Buffer ); - - (void)nPreRefCount; // for non-debug mode. - assert( pSeq->nRefCount == nPreRefCount ); // callee must not take ref - } + if( pTokenHandler ) + nRet = pTokenHandler->getTokenDirect( pToken, (sal_Int32) nLen ); else { - Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); // heap allocate & free + // heap allocate, copy & then free + Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); nRet = xTokenHandler->getTokenFromUTF8( aSeq ); } diff --git a/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx b/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx index 096de7f..4cf32f5 100644 --- a/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx +++ b/writerfilter/source/ooxml/OOXMLFastTokenHandler.cxx @@ -108,23 +108,16 @@ css::uno::Sequence< ::sal_Int8 > SAL_CALL OOXMLFastTokenHandler::getUTF8Identifi #endif } -::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenFromUTF8 -(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException) +::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenDirect( const char *pStr, sal_Int32 nLength ) const { - ::sal_Int32 nResult = OOXML_FAST_TOKENS_END; - struct tokenmap::token * pToken = - tokenmap::Perfect_Hash::in_word_set - (reinterpret_cast<const char *>(Identifier.getConstArray()), - Identifier.getLength()); + tokenmap::Perfect_Hash::in_word_set( pStr, nLength ); - if (pToken != NULL) - nResult = pToken->nToken; + sal_Int32 nResult = pToken != NULL ? pToken->nToken : OOXML_FAST_TOKENS_END; #ifdef DEBUG_TOKEN clog << "getTokenFromUTF8: " - << string(reinterpret_cast<const char *> - (Identifier.getConstArray()), Identifier.getLength()) + << string(pStr, nLength) << ", " << nResult << (pToken == NULL ? ", failed" : "") << endl; #endif @@ -132,6 +125,14 @@ css::uno::Sequence< ::sal_Int8 > SAL_CALL OOXMLFastTokenHandler::getUTF8Identifi return nResult; } +::sal_Int32 SAL_CALL OOXMLFastTokenHandler::getTokenFromUTF8 +(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException) +{ + return getTokenDirect(reinterpret_cast<const char *> + (Identifier.getConstArray()), + Identifier.getLength()); +} + }} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx b/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx index 120c4e7..ddb5b50 100644 --- a/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx +++ b/writerfilter/source/ooxml/OOXMLFastTokenHandler.hxx @@ -24,6 +24,7 @@ #include "com/sun/star/uno/XComponentContext.hpp" #include "cppuhelper/implbase1.hxx" #include "com/sun/star/xml/sax/XFastTokenHandler.hpp" +#include "sax/fastattribs.hxx" namespace writerfilter { namespace ooxml @@ -31,7 +32,8 @@ namespace ooxml class OOXMLFastTokenHandler: public ::cppu::WeakImplHelper1< - css::xml::sax::XFastTokenHandler> + css::xml::sax::XFastTokenHandler>, + public sax_fastparser::FastTokenHandlerBase { public: explicit OOXMLFastTokenHandler(css::uno::Reference< css::uno::XComponentContext > const & context); @@ -42,6 +44,9 @@ public: virtual css::uno::Sequence< ::sal_Int8 > SAL_CALL getUTF8Identifier(::sal_Int32 Token) throw (css::uno::RuntimeException); virtual ::sal_Int32 SAL_CALL getTokenFromUTF8(const css::uno::Sequence< ::sal_Int8 > & Identifier) throw (css::uno::RuntimeException); + // Much faster direct C++ shortcut to the method that matters + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const; + private: OOXMLFastTokenHandler(OOXMLFastTokenHandler &); // not defined void operator =(OOXMLFastTokenHandler &); // not defined commit 2d681ad020d3cd6beb53a0de1c3057537f31c8df Author: Michael Meeks <[email protected]> Date: Tue Nov 26 13:11:44 2013 +0000 fastparser: don't waste cycles churning reference counts. diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index da224d0..415747c 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -227,18 +227,20 @@ void Entity::startElement( Event *pEvent ) const sal_Int32& nElementToken = pEvent->mnElementToken; const OUString& aNamespace = pEvent->msNamespace; const OUString& aElementName = pEvent->msElementName; - Reference< XFastContextHandler > xParentContext; + + // Use un-wrapped pointers to avoid significant acquire/release overhead + XFastContextHandler *pParentContext = NULL; if( !maContextStack.empty() ) { - xParentContext = maContextStack.top().mxContext; - if (!xParentContext.is()) + pParentContext = maContextStack.top().mxContext.get(); + if( !pParentContext ) { maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) ); return; } } - maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) ); + maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) ); try { @@ -246,8 +248,8 @@ void Entity::startElement( Event *pEvent ) Reference< XFastContextHandler > xContext; if( nElementToken == FastToken::DONTKNOW ) { - if( xParentContext.is() ) - xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); + if( pParentContext ) + xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); else if( mxDocumentHandler.is() ) xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); @@ -258,17 +260,17 @@ void Entity::startElement( Event *pEvent ) } else { - if( xParentContext.is() ) - xContext = xParentContext->createFastChildContext( nElementToken, xAttr ); + if( pParentContext ) + xContext = pParentContext->createFastChildContext( nElementToken, xAttr ); else if( mxDocumentHandler.is() ) xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); if( xContext.is() ) - { xContext->startFastElement( nElementToken, xAttr ); - } } - maContextStack.top().mxContext = xContext; + // swap the reference we own in to avoid referencing thrash. + maContextStack.top().mxContext.set( static_cast<XFastContextHandler *>( xContext.get() ) ); + xContext.set( NULL, UNO_REF_NO_ACQUIRE ); } catch (const Exception& e) { _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
