include/oox/core/xmlfilterbase.hxx | 8 - oox/source/core/xmlfilterbase.cxx | 170 +++++++++++++++--------------- sc/Library_scfilt.mk | 1 sc/source/filter/inc/sheetdatacontext.hxx | 11 + sc/source/filter/inc/workbookhelper.hxx | 6 - sc/source/filter/oox/sheetdatacontext.cxx | 6 + sc/source/filter/oox/threadpool.cxx | 166 +++++++++++++++++++++++++++++ sc/source/filter/oox/threadpool.hxx | 53 +++++++++ sc/source/filter/oox/workbookfragment.cxx | 93 +++++++++++++++- sc/source/filter/oox/workbookhelper.cxx | 7 + 10 files changed, 427 insertions(+), 94 deletions(-)
New commits: commit 5b8b567762c6573eaf35ffa6508c0354fba68f34 Author: Michael Stahl <[email protected]> Date: Wed Nov 27 20:50:44 2013 +0100 sc: no such thing as std::vector::find Change-Id: I11bb688a8e0affda9ed78a61d9aba3d244914f8c diff --git a/sc/source/filter/oox/threadpool.cxx b/sc/source/filter/oox/threadpool.cxx index 9de1a14..8349661 100644 --- a/sc/source/filter/oox/threadpool.cxx +++ b/sc/source/filter/oox/threadpool.cxx @@ -9,6 +9,9 @@ #include "threadpool.hxx" +#include <algorithm> + + class ThreadPool::ThreadWorker : public salhelper::Thread { ThreadPool *mpPool; @@ -102,7 +105,8 @@ void ThreadPool::waitUntilWorkersDone() { rtl::Reference< ThreadWorker > xWorker = maWorkers.back(); maWorkers.pop_back(); - assert( maWorkers.find( xWorker ) == maWorkers.end() ); + assert(std::find(maWorkers.begin(), maWorkers.end(), xWorker) + == maWorkers.end()); xWorker->signalNewWork(); aGuard.clear(); { // unlocked commit 4e5ec5476cde6861ead84202b2904bd96f4c307c Author: Michael Meeks <[email protected]> Date: Wed Nov 27 18:11:34 2013 +0000 sc: threaded parsing of the core data inside large XLSX files Enabled in experimental mode only or via SC_IMPORT_THREADS=<N> this allows significant parallelisation of sheet reading. I also implement a simple thread pool to manage that. Conflicts: sc/source/filter/oox/workbookfragment.cxx Change-Id: I66c72211f2699490230e993a374c26b1892eac12 diff --git a/sc/Library_scfilt.mk b/sc/Library_scfilt.mk index 499f873..eb0d5d2 100644 --- a/sc/Library_scfilt.mk +++ b/sc/Library_scfilt.mk @@ -211,6 +211,7 @@ $(eval $(call gb_Library_add_exception_objects,scfilt,\ sc/source/filter/oox/tablebuffer \ sc/source/filter/oox/tablefragment \ sc/source/filter/oox/themebuffer \ + sc/source/filter/oox/threadpool \ sc/source/filter/oox/unitconverter \ sc/source/filter/oox/viewsettings \ sc/source/filter/oox/workbookfragment \ diff --git a/sc/source/filter/inc/sheetdatacontext.hxx b/sc/source/filter/inc/sheetdatacontext.hxx index b492d2a..3f3e377 100644 --- a/sc/source/filter/inc/sheetdatacontext.hxx +++ b/sc/source/filter/inc/sheetdatacontext.hxx @@ -23,6 +23,9 @@ #include "excelhandlers.hxx" #include "richstring.hxx" #include "sheetdatabuffer.hxx" +#include <vcl/svapp.hxx> + +#define MULTI_THREAD_SHEET_PARSING 1 namespace oox { namespace xls { @@ -54,8 +57,16 @@ struct SheetDataContextBase */ class SheetDataContext : public WorksheetContextBase, private SheetDataContextBase { + // If we are doing threaded parsing, this SheetDataContext + // forms the inner loop for bulk data parsing, and for the + // duration of this we can drop the solar mutex. +#if MULTI_THREAD_SHEET_PARSING + SolarMutexReleaser aReleaser; +#endif + public: explicit SheetDataContext( WorksheetFragmentBase& rFragment ); + virtual ~SheetDataContext(); protected: virtual ::oox::core::ContextHandlerRef onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs ); diff --git a/sc/source/filter/oox/sheetdatacontext.cxx b/sc/source/filter/oox/sheetdatacontext.cxx index 5170234..9a0f7df 100644 --- a/sc/source/filter/oox/sheetdatacontext.cxx +++ b/sc/source/filter/oox/sheetdatacontext.cxx @@ -90,6 +90,12 @@ SheetDataContext::SheetDataContext( WorksheetFragmentBase& rFragment ) : mnRow( -1 ), mnCol( -1 ) { + SAL_INFO( "sc.filter", "start safe sheet data context - unlock\n" ); +} + +SheetDataContext::~SheetDataContext() +{ + SAL_INFO( "sc.filter", "end safe sheet data context - relock\n" ); } ContextHandlerRef SheetDataContext::onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs ) diff --git a/sc/source/filter/oox/threadpool.cxx b/sc/source/filter/oox/threadpool.cxx new file mode 100644 index 0000000..9de1a14 --- /dev/null +++ b/sc/source/filter/oox/threadpool.cxx @@ -0,0 +1,162 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "threadpool.hxx" + +class ThreadPool::ThreadWorker : public salhelper::Thread +{ + ThreadPool *mpPool; + osl::Condition maNewWork; +public: + ThreadWorker( ThreadPool *pPool ) : + salhelper::Thread("sheet-import-thread-pool"), + mpPool( pPool ) {} + + virtual void execute() + { + ThreadTask *pTask; + while ( ( pTask = waitForWork() ) ) + { + pTask->doWork(); + delete pTask; + } + } + + ThreadTask *waitForWork() + { + ThreadTask *pRet = NULL; + + osl::ResettableMutexGuard aGuard( mpPool->maGuard ); + + pRet = mpPool->popWork(); + + while( !pRet ) + { + maNewWork.reset(); + + if( mpPool->mbTerminate ) + break; + + aGuard.clear(); // unlock + + maNewWork.wait(); + + aGuard.reset(); // lock + + pRet = mpPool->popWork(); + } + + return pRet; + } + + // + // Why a condition per worker thread - you may ask. + // + // Unfortunately the Windows synchronisation API that we wrap + // is horribly inadequate cf. + // http://www.cs.wustl.edu/~schmidt/win32-cv-1.html + // The existing osl::Condition API should only ever be used + // between one producer and one consumer thread to avoid the + // lost wakeup problem. + // + void signalNewWork() + { + maNewWork.set(); + } +}; + +ThreadPool::ThreadPool( sal_Int32 nWorkers ) : + mbTerminate( false ) +{ + for( sal_Int32 i = 0; i < nWorkers; i++ ) + maWorkers.push_back( new ThreadWorker( this ) ); + + maTasksEmpty.reset(); + + osl::MutexGuard aGuard( maGuard ); + for( size_t i = 0; i < maWorkers.size(); i++ ) + maWorkers[ i ]->launch(); +} + +ThreadPool::~ThreadPool() +{ + waitUntilWorkersDone(); +} + +/// wait until all the workers have completed and +/// terminate all threads +void ThreadPool::waitUntilWorkersDone() +{ + waitUntilEmpty(); + + osl::ResettableMutexGuard aGuard( maGuard ); + mbTerminate = true; + + while( !maWorkers.empty() ) + { + rtl::Reference< ThreadWorker > xWorker = maWorkers.back(); + maWorkers.pop_back(); + assert( maWorkers.find( xWorker ) == maWorkers.end() ); + xWorker->signalNewWork(); + aGuard.clear(); + { // unlocked + xWorker->join(); + xWorker.clear(); + } + aGuard.reset(); + } +} + +void ThreadPool::pushTask( ThreadTask *pTask ) +{ + osl::MutexGuard aGuard( maGuard ); + maTasks.insert( maTasks.begin(), pTask ); + // horrible beyond belief: + for( size_t i = 0; i < maWorkers.size(); i++ ) + maWorkers[ i ]->signalNewWork(); + maTasksEmpty.reset(); +} + +ThreadTask *ThreadPool::popWork() +{ + if( !maTasks.empty() ) + { + ThreadTask *pTask = maTasks.back(); + maTasks.pop_back(); + return pTask; + } + else + maTasksEmpty.set(); + return NULL; +} + +void ThreadPool::waitUntilEmpty() +{ + osl::ResettableMutexGuard aGuard( maGuard ); + + if( maWorkers.empty() ) + { // no threads at all -> execute the work in-line + ThreadTask *pTask; + while ( ( pTask = popWork() ) ) + { + pTask->doWork(); + delete pTask; + } + mbTerminate = true; + } + else + { + aGuard.clear(); + maTasksEmpty.wait(); + aGuard.reset(); + } + assert( maTasks.empty() ); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/filter/oox/threadpool.hxx b/sc/source/filter/oox/threadpool.hxx new file mode 100644 index 0000000..036534f --- /dev/null +++ b/sc/source/filter/oox/threadpool.hxx @@ -0,0 +1,53 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef SC_THREADPOOL_HXX +#define SC_THREADPOOL_HXX + +#include <sal/config.h> +#include <salhelper/thread.hxx> +#include <osl/mutex.hxx> +#include <osl/conditn.hxx> +#include <rtl/ref.hxx> +#include <vector> + +class ThreadTask +{ +public: + virtual ~ThreadTask() {} + virtual void doWork() = 0; +}; + +/// A very basic thread pool implementation +class ThreadPool +{ +public: + ThreadPool( sal_Int32 nWorkers ); + virtual ~ThreadPool(); + void pushTask( ThreadTask *pTask /* takes ownership */ ); + void waitUntilEmpty(); + void waitUntilWorkersDone(); + +private: + class ThreadWorker; + friend class ThreadWorker; + + ThreadTask *waitForWork( osl::Condition &rNewWork ); + ThreadTask *popWork(); + + osl::Mutex maGuard; + osl::Condition maTasksEmpty; + bool mbTerminate; + std::vector< rtl::Reference< ThreadWorker > > maWorkers; + std::vector< ThreadTask * > maTasks; +}; + +#endif // SC_THREADPOOL_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/filter/oox/workbookfragment.cxx b/sc/source/filter/oox/workbookfragment.cxx index e9884da..e666fa3 100644 --- a/sc/source/filter/oox/workbookfragment.cxx +++ b/sc/source/filter/oox/workbookfragment.cxx @@ -42,11 +42,16 @@ #include "workbooksettings.hxx" #include "worksheetbuffer.hxx" #include "worksheetfragment.hxx" +#include "sheetdatacontext.hxx" +#include "threadpool.hxx" +#include "officecfg/Office/Common.hxx" #include "document.hxx" #include "docsh.hxx" #include "calcconfig.hxx" +#include <vcl/svapp.hxx> + #include <oox/core/fastparser.hxx> #include <comphelper/processfactory.hxx> #include <officecfg/Office/Calc.hxx> @@ -201,187 +206,80 @@ const RecordInfo* WorkbookFragment::getRecordInfos() const namespace { -class WorkerThread; - typedef std::pair<WorksheetGlobalsRef, FragmentHandlerRef> SheetFragmentHandler; typedef std::vector<SheetFragmentHandler> SheetFragmentVector; -typedef rtl::Reference<WorkerThread> WorkerThreadRef; - -struct WorkerThreadData -{ - osl::Mutex maMtx; - std::vector<WorkerThreadRef> maThreads; -}; -struct IdleWorkerThreadData -{ - osl::Mutex maMtx; - osl::Condition maCondAdded; - std::queue<WorkerThread*> maThreads; -}; - -struct -{ - boost::scoped_ptr<WorkerThreadData> mpWorkerThreads; - boost::scoped_ptr<IdleWorkerThreadData> mpIdleThreads; - -} aThreadGlobals; - -enum WorkerAction -{ - None = 0, - TerminateThread, - Work -}; - -class WorkerThread : public salhelper::Thread +class WorkerThread : public ThreadTask { WorkbookFragment& mrWorkbookHandler; - size_t mnID; - FragmentHandlerRef mxHandler; - boost::scoped_ptr<oox::core::FastParser> mxParser; - osl::Mutex maMtxAction; - osl::Condition maCondActionChanged; - WorkerAction meAction; -public: - WorkerThread( WorkbookFragment& rWorkbookHandler, size_t nID ) : - salhelper::Thread("sheet-import-worker-thread"), - mrWorkbookHandler(rWorkbookHandler), - mnID(nID), - mxParser(rWorkbookHandler.getOoxFilter().createParser()), - meAction(None) {} - - virtual void execute() - { - announceIdle(); + rtl::Reference<FragmentHandler> mxHandler; - // Keep looping until the terminate request is set. - for (maCondActionChanged.wait(); true; maCondActionChanged.wait()) - { - osl::MutexGuard aGuard(maMtxAction); - if (!maCondActionChanged.check()) - // Wait again. - continue; - - maCondActionChanged.reset(); - - if (meAction == TerminateThread) - // End the thread. - return; - - if (meAction != Work) - continue; - -#if 0 - // TODO : This still deadlocks in the fast parser code. - mrWorkbookHandler.importOoxFragment(mxHandler, *mxParser); -#else - double val = rand() / static_cast<double>(RAND_MAX); - val *= 1000000; // normalize to 1 second. - val *= 1.5; // inflate it a bit. - usleep(val); // pretend to be working while asleep. -#endif - announceIdle(); - } - } - - void announceIdle() - { - // Set itself idle to receive a new task from the main thread. - osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx); - aThreadGlobals.mpIdleThreads->maThreads.push(this); - aThreadGlobals.mpIdleThreads->maCondAdded.set(); - } - - void terminate() +public: + WorkerThread( WorkbookFragment& rWorkbookHandler, + const rtl::Reference<FragmentHandler>& xHandler ) : + mrWorkbookHandler( rWorkbookHandler ), + mxHandler( xHandler ) { - osl::MutexGuard aGuard(maMtxAction); - meAction = TerminateThread; - maCondActionChanged.set(); } - void assign( const FragmentHandlerRef& rHandler ) + virtual void doWork() { - osl::MutexGuard aGuard(maMtxAction); - mxHandler = rHandler; - meAction = Work; - maCondActionChanged.set(); + // We hold the solar mutex in all threads except for + // the small safe section of the inner loop in + // sheetdatacontext.cxx + SAL_INFO( "sc.filter", "start wait on solar\n" ); + SolarMutexGuard maGuard; + SAL_INFO( "sc.filter", "got solar\n" ); + + boost::scoped_ptr<oox::core::FastParser> xParser( + mrWorkbookHandler.getOoxFilter().createParser() ); + + SAL_INFO( "sc.filter", "start import\n" ); + mrWorkbookHandler.importOoxFragment( mxHandler, *xParser ); + SAL_INFO( "sc.filter", "end import, release solar\n" ); } }; void importSheetFragments( WorkbookFragment& rWorkbookHandler, SheetFragmentVector& rSheets ) { -#if 0 // threaded version - size_t nThreadCount = 3; - if (nThreadCount > rSheets.size()) - nThreadCount = rSheets.size(); + sal_Int32 nThreads = std::min( rSheets.size(), (size_t) 4 /* FIXME: ncpus/2 */ ); - // Create new thread globals. - aThreadGlobals.mpWorkerThreads.reset(new WorkerThreadData); - aThreadGlobals.mpIdleThreads.reset(new IdleWorkerThreadData); + Reference< XComponentContext > xContext = comphelper::getProcessComponentContext(); - SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); + // Force threading off unless experimental mode or env. var is set. + if( !officecfg::Office::Common::Misc::ExperimentalMode::get( xContext ) ) + nThreads = 0; - { - // Initialize worker threads. - osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx); - for (size_t i = 0; i < nThreadCount; ++i) - { - WorkerThreadRef pThread(new WorkerThread(rWorkbookHandler, i)); - aThreadGlobals.mpWorkerThreads->maThreads.push_back(pThread); - pThread->launch(); - } - } + const char *pEnv; + if( ( pEnv = getenv( "SC_IMPORT_THREADS" ) ) ) + nThreads = rtl_str_toInt32( pEnv, 10 ); - for (aThreadGlobals.mpIdleThreads->maCondAdded.wait(); true; aThreadGlobals.mpIdleThreads->maCondAdded.wait()) + if( nThreads != 0 ) { - osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx); - if (!aThreadGlobals.mpIdleThreads->maCondAdded.check()) - // Wait again. - continue; + // test sequential read in this mode + if( nThreads < 0) + nThreads = 0; + ThreadPool aPool( nThreads ); - aThreadGlobals.mpIdleThreads->maCondAdded.reset(); + SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); + for( ; it != itEnd; ++it ) + aPool.pushTask( new WorkerThread( rWorkbookHandler, it->second ) ) + ; - // Assign work to all idle threads. - while (!aThreadGlobals.mpIdleThreads->maThreads.empty()) { - if (it == itEnd) - break; - - WorkerThread* p = aThreadGlobals.mpIdleThreads->maThreads.front(); - aThreadGlobals.mpIdleThreads->maThreads.pop(); - p->assign(it->second); - ++it; + // Ideally no-one else but our worker threads can re-acquire that. + // potentially if that causes a problem we might want to extend + // the SolarMutex functionality to allow passing it around. + SolarMutexReleaser aReleaser; + aPool.waitUntilWorkersDone(); } - - if (it == itEnd) - // Finished! Exit the loop. - break; } - - { - // Terminate all worker threads. - osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx); - for (size_t i = 0, n = aThreadGlobals.mpWorkerThreads->maThreads.size(); i < n; ++i) - { - WorkerThreadRef pWorker = aThreadGlobals.mpWorkerThreads->maThreads[i]; - pWorker->terminate(); - if (pWorker.is()) - pWorker->join(); - } - } - - // Delete all thread globals. - aThreadGlobals.mpWorkerThreads.reset(); - aThreadGlobals.mpIdleThreads.reset(); - -#else // non-threaded version - for( SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); it != itEnd; ++it) + else { - // import the sheet fragment - rWorkbookHandler.importOoxFragment(it->second); + SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); + for( ; it != itEnd; ++it ) + rWorkbookHandler.importOoxFragment( it->second ); } -#endif } } commit 934941a4cf7c9ee7df69f03e6f0be246499d587f Author: Kohei Yoshida <[email protected]> Date: Fri Nov 22 20:57:40 2013 -0500 Allow worker threads to use their own FastParser instances. To prevent deadlock during threaded sheet stream parsing. It now deadlocks at a different place. Conflicts: oox/source/core/xmlfilterbase.cxx sc/source/filter/oox/workbookfragment.cxx Change-Id: I0ba0f2c9a257e71b0a340ab14e369b06d5fd8829 diff --git a/include/oox/core/xmlfilterbase.hxx b/include/oox/core/xmlfilterbase.hxx index 87234fb..76eb091 100644 --- a/include/oox/core/xmlfilterbase.hxx +++ b/include/oox/core/xmlfilterbase.hxx @@ -56,8 +56,7 @@ namespace oox { namespace core { class FragmentHandler; - -// ============================================================================ +class FastParser; struct TextField { com::sun::star::uno::Reference< com::sun::star::text::XText > xText; @@ -107,7 +106,8 @@ public: @return True, if the fragment could be imported. */ - bool importFragment( const ::rtl::Reference< FragmentHandler >& rxHandler ); + bool importFragment( const rtl::Reference<FragmentHandler>& rxHandler ); + bool importFragment( const rtl::Reference<FragmentHandler>& rxHandler, FastParser& rParser ); /** Imports a fragment into an xml::dom::XDocument. @@ -231,6 +231,8 @@ public: void importDocumentProperties(); + FastParser* createParser() const; + protected: virtual ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > implGetInputStream( utl::MediaDescriptor& rMediaDesc ) const; diff --git a/oox/source/core/xmlfilterbase.cxx b/oox/source/core/xmlfilterbase.cxx index 98c8886..d1ae6a5 100644 --- a/oox/source/core/xmlfilterbase.cxx +++ b/oox/source/core/xmlfilterbase.cxx @@ -74,12 +74,6 @@ using utl::MediaDescriptor; using ::sax_fastparser::FSHelperPtr; using ::sax_fastparser::FastSerializerHelper; - - - - -// ============================================================================ - namespace { bool lclHasSuffix( const OUString& rFragmentPath, const OUString& rSuffix ) @@ -88,9 +82,77 @@ bool lclHasSuffix( const OUString& rFragmentPath, const OUString& rSuffix ) return (nSuffixPos >= 0) && rFragmentPath.match( rSuffix, nSuffixPos ); } -} // namespace +struct NamespaceIds: public rtl::StaticWithInit< + Sequence< beans::Pair< OUString, sal_Int32 > >, + NamespaceIds> +{ + Sequence< beans::Pair< OUString, sal_Int32 > > operator()() + { + static const char* const namespaceURIs[] = { + "http://www.w3.org/XML/1998/namespace", + "http://schemas.openxmlformats.org/package/2006/relationships", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships", + "http://schemas.openxmlformats.org/drawingml/2006/main", + "http://schemas.openxmlformats.org/drawingml/2006/diagram", + "http://schemas.openxmlformats.org/drawingml/2006/chart", + "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing", + "urn:schemas-microsoft-com:vml", + "urn:schemas-microsoft-com:office:office", + "urn:schemas-microsoft-com:office:word", + "urn:schemas-microsoft-com:office:excel", + "urn:schemas-microsoft-com:office:powerpoint", + "http://schemas.microsoft.com/office/2006/activeX", + "http://schemas.openxmlformats.org/spreadsheetml/2006/main", + "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing", + "http://schemas.microsoft.com/office/excel/2006/main", + "http://schemas.openxmlformats.org/presentationml/2006/main", + "http://schemas.openxmlformats.org/markup-compatibility/2006", + "http://schemas.openxmlformats.org/spreadsheetml/2006/main/v2", + "http://schemas.microsoft.com/office/drawing/2008/diagram", + "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" + }; + + static const sal_Int32 namespaceIds[] = { + NMSP_xml, + NMSP_packageRel, + NMSP_officeRel, + NMSP_dml, + NMSP_dmlDiagram, + NMSP_dmlChart, + NMSP_dmlChartDr, + NMSP_dmlSpreadDr, + NMSP_vml, + NMSP_vmlOffice, + NMSP_vmlWord, + NMSP_vmlExcel, + NMSP_vmlPowerpoint, + NMSP_xls, + NMSP_ppt, + NMSP_ax, + NMSP_xm, + NMSP_mce, + NMSP_mceTest, + NMSP_dsp, + NMSP_xlsExtLst + }; + + Sequence< beans::Pair< OUString, sal_Int32 > > aRet(STATIC_ARRAY_SIZE(namespaceIds)); + for( sal_Int32 i=0; i<aRet.getLength(); ++i ) + aRet[i] = make_Pair( + OUString::createFromAscii(namespaceURIs[i]), + namespaceIds[i]); + return aRet; + } +}; -// ============================================================================ +void registerNamespaces( FastParser& rParser ) +{ + const Sequence< beans::Pair<OUString, sal_Int32> > ids = NamespaceIds::get(); + for (sal_Int32 i = 0; i < ids.getLength(); ++i) + rParser.registerNamespace(ids[i].Second); +} + +} // namespace struct XmlFilterBaseImpl { @@ -105,75 +167,6 @@ struct XmlFilterBaseImpl explicit XmlFilterBaseImpl( const Reference< XComponentContext >& rxContext ) throw( RuntimeException ); }; -// ---------------------------------------------------------------------------- - -namespace -{ - struct NamespaceIds: public rtl::StaticWithInit< - Sequence< beans::Pair< OUString, sal_Int32 > >, - NamespaceIds> - { - Sequence< beans::Pair< OUString, sal_Int32 > > operator()() - { - static const char* const namespaceURIs[] = { - "http://www.w3.org/XML/1998/namespace", - "http://schemas.openxmlformats.org/package/2006/relationships", - "http://schemas.openxmlformats.org/officeDocument/2006/relationships", - "http://schemas.openxmlformats.org/drawingml/2006/main", - "http://schemas.openxmlformats.org/drawingml/2006/diagram", - "http://schemas.openxmlformats.org/drawingml/2006/chart", - "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing", - "urn:schemas-microsoft-com:vml", - "urn:schemas-microsoft-com:office:office", - "urn:schemas-microsoft-com:office:word", - "urn:schemas-microsoft-com:office:excel", - "urn:schemas-microsoft-com:office:powerpoint", - "http://schemas.microsoft.com/office/2006/activeX", - "http://schemas.openxmlformats.org/spreadsheetml/2006/main", - "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing", - "http://schemas.microsoft.com/office/excel/2006/main", - "http://schemas.openxmlformats.org/presentationml/2006/main", - "http://schemas.openxmlformats.org/markup-compatibility/2006", - "http://schemas.openxmlformats.org/spreadsheetml/2006/main/v2", - "http://schemas.microsoft.com/office/drawing/2008/diagram", - "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main" - }; - - static const sal_Int32 namespaceIds[] = { - NMSP_xml, - NMSP_packageRel, - NMSP_officeRel, - NMSP_dml, - NMSP_dmlDiagram, - NMSP_dmlChart, - NMSP_dmlChartDr, - NMSP_dmlSpreadDr, - NMSP_vml, - NMSP_vmlOffice, - NMSP_vmlWord, - NMSP_vmlExcel, - NMSP_vmlPowerpoint, - NMSP_xls, - NMSP_ppt, - NMSP_ax, - NMSP_xm, - NMSP_mce, - NMSP_mceTest, - NMSP_dsp, - NMSP_xlsExtLst - }; - - Sequence< beans::Pair< OUString, sal_Int32 > > aRet(STATIC_ARRAY_SIZE(namespaceIds)); - for( sal_Int32 i=0; i<aRet.getLength(); ++i ) - aRet[i] = make_Pair( - OUString::createFromAscii(namespaceURIs[i]), - namespaceIds[i]); - return aRet; - } - }; -} - -// ---------------------------------------------------------------------------- XmlFilterBaseImpl::XmlFilterBaseImpl( const Reference< XComponentContext >& rxContext ) throw( RuntimeException ) : maFastParser( rxContext ), @@ -181,10 +174,7 @@ XmlFilterBaseImpl::XmlFilterBaseImpl( const Reference< XComponentContext >& rxCo maVmlSuffix( ".vml" ) { // register XML namespaces - const Sequence< beans::Pair< OUString, sal_Int32 > > ids= - NamespaceIds::get(); - for( sal_Int32 i=0; i<ids.getLength(); ++i ) - maFastParser.registerNamespace( ids[i].Second ); + registerNamespaces(maFastParser); } XmlFilterBase::XmlFilterBase( const Reference< XComponentContext >& rxContext ) throw( RuntimeException ) : @@ -220,13 +210,25 @@ void XmlFilterBase::importDocumentProperties() xImporter->importProperties( xDocumentStorage, xPropSupplier->getDocumentProperties() ); } +FastParser* XmlFilterBase::createParser() const +{ + FastParser* pParser = new FastParser(getComponentContext()); + registerNamespaces(*pParser); + return pParser; +} + OUString XmlFilterBase::getFragmentPathFromFirstType( const OUString& rType ) { // importRelations() caches the relations map for subsequence calls return importRelations( OUString() )->getFragmentPathFromFirstType( rType ); } -bool XmlFilterBase::importFragment( const ::rtl::Reference< FragmentHandler >& rxHandler ) +bool XmlFilterBase::importFragment( const rtl::Reference<FragmentHandler>& rxHandler ) +{ + return importFragment(rxHandler, mxImpl->maFastParser); +} + +bool XmlFilterBase::importFragment( const rtl::Reference<FragmentHandler>& rxHandler, FastParser& rParser ) { OSL_ENSURE( rxHandler.is(), "XmlFilterBase::importFragment - missing fragment handler" ); if( !rxHandler.is() ) @@ -280,8 +282,8 @@ bool XmlFilterBase::importFragment( const ::rtl::Reference< FragmentHandler >& r // own try/catch block for showing parser failure assertion with fragment path if( xInStrm.is() ) try { - mxImpl->maFastParser.setDocumentHandler( xDocHandler ); - mxImpl->maFastParser.parseStream( xInStrm, aFragmentPath ); + rParser.setDocumentHandler(xDocHandler); + rParser.parseStream(xInStrm, aFragmentPath); return true; } catch( Exception& ) diff --git a/sc/source/filter/inc/workbookhelper.hxx b/sc/source/filter/inc/workbookhelper.hxx index abafb20..1f472c3 100644 --- a/sc/source/filter/inc/workbookhelper.hxx +++ b/sc/source/filter/inc/workbookhelper.hxx @@ -53,6 +53,7 @@ namespace oox { namespace core { class FilterBase; class FragmentHandler; class XmlFilterBase; + class FastParser; } } class ScDocument; @@ -269,7 +270,10 @@ public: /** Imports a fragment using the passed fragment handler, which contains the full path to the fragment stream. */ - bool importOoxFragment( const ::rtl::Reference< ::oox::core::FragmentHandler >& rxHandler ); + bool importOoxFragment( const rtl::Reference<oox::core::FragmentHandler>& rxHandler ); + + bool importOoxFragment( const rtl::Reference<oox::core::FragmentHandler>& rxHandler, oox::core::FastParser& rParser ); + // BIFF2-BIFF8 specific (MUST NOT be called in OOXML/BIFF12 filter) ------- diff --git a/sc/source/filter/oox/workbookfragment.cxx b/sc/source/filter/oox/workbookfragment.cxx index 485642c..e9884da 100644 --- a/sc/source/filter/oox/workbookfragment.cxx +++ b/sc/source/filter/oox/workbookfragment.cxx @@ -48,6 +48,8 @@ #include "calcconfig.hxx" #include <oox/core/fastparser.hxx> +#include <comphelper/processfactory.hxx> +#include <officecfg/Office/Calc.hxx> #include <salhelper/thread.hxx> #include <osl/conditn.hxx> @@ -197,6 +199,193 @@ const RecordInfo* WorkbookFragment::getRecordInfos() const return spRecInfos; } +namespace { + +class WorkerThread; + +typedef std::pair<WorksheetGlobalsRef, FragmentHandlerRef> SheetFragmentHandler; +typedef std::vector<SheetFragmentHandler> SheetFragmentVector; +typedef rtl::Reference<WorkerThread> WorkerThreadRef; + +struct WorkerThreadData +{ + osl::Mutex maMtx; + std::vector<WorkerThreadRef> maThreads; +}; + +struct IdleWorkerThreadData +{ + osl::Mutex maMtx; + osl::Condition maCondAdded; + std::queue<WorkerThread*> maThreads; +}; + +struct +{ + boost::scoped_ptr<WorkerThreadData> mpWorkerThreads; + boost::scoped_ptr<IdleWorkerThreadData> mpIdleThreads; + +} aThreadGlobals; + +enum WorkerAction +{ + None = 0, + TerminateThread, + Work +}; + +class WorkerThread : public salhelper::Thread +{ + WorkbookFragment& mrWorkbookHandler; + size_t mnID; + FragmentHandlerRef mxHandler; + boost::scoped_ptr<oox::core::FastParser> mxParser; + osl::Mutex maMtxAction; + osl::Condition maCondActionChanged; + WorkerAction meAction; +public: + WorkerThread( WorkbookFragment& rWorkbookHandler, size_t nID ) : + salhelper::Thread("sheet-import-worker-thread"), + mrWorkbookHandler(rWorkbookHandler), + mnID(nID), + mxParser(rWorkbookHandler.getOoxFilter().createParser()), + meAction(None) {} + + virtual void execute() + { + announceIdle(); + + // Keep looping until the terminate request is set. + for (maCondActionChanged.wait(); true; maCondActionChanged.wait()) + { + osl::MutexGuard aGuard(maMtxAction); + if (!maCondActionChanged.check()) + // Wait again. + continue; + + maCondActionChanged.reset(); + + if (meAction == TerminateThread) + // End the thread. + return; + + if (meAction != Work) + continue; + +#if 0 + // TODO : This still deadlocks in the fast parser code. + mrWorkbookHandler.importOoxFragment(mxHandler, *mxParser); +#else + double val = rand() / static_cast<double>(RAND_MAX); + val *= 1000000; // normalize to 1 second. + val *= 1.5; // inflate it a bit. + usleep(val); // pretend to be working while asleep. +#endif + announceIdle(); + } + } + + void announceIdle() + { + // Set itself idle to receive a new task from the main thread. + osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx); + aThreadGlobals.mpIdleThreads->maThreads.push(this); + aThreadGlobals.mpIdleThreads->maCondAdded.set(); + } + + void terminate() + { + osl::MutexGuard aGuard(maMtxAction); + meAction = TerminateThread; + maCondActionChanged.set(); + } + + void assign( const FragmentHandlerRef& rHandler ) + { + osl::MutexGuard aGuard(maMtxAction); + mxHandler = rHandler; + meAction = Work; + maCondActionChanged.set(); + } +}; + +void importSheetFragments( WorkbookFragment& rWorkbookHandler, SheetFragmentVector& rSheets ) +{ +#if 0 // threaded version + size_t nThreadCount = 3; + if (nThreadCount > rSheets.size()) + nThreadCount = rSheets.size(); + + // Create new thread globals. + aThreadGlobals.mpWorkerThreads.reset(new WorkerThreadData); + aThreadGlobals.mpIdleThreads.reset(new IdleWorkerThreadData); + + SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); + + { + // Initialize worker threads. + osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx); + for (size_t i = 0; i < nThreadCount; ++i) + { + WorkerThreadRef pThread(new WorkerThread(rWorkbookHandler, i)); + aThreadGlobals.mpWorkerThreads->maThreads.push_back(pThread); + pThread->launch(); + } + } + + for (aThreadGlobals.mpIdleThreads->maCondAdded.wait(); true; aThreadGlobals.mpIdleThreads->maCondAdded.wait()) + { + osl::MutexGuard aGuard(aThreadGlobals.mpIdleThreads->maMtx); + if (!aThreadGlobals.mpIdleThreads->maCondAdded.check()) + // Wait again. + continue; + + aThreadGlobals.mpIdleThreads->maCondAdded.reset(); + + // Assign work to all idle threads. + while (!aThreadGlobals.mpIdleThreads->maThreads.empty()) + { + if (it == itEnd) + break; + + WorkerThread* p = aThreadGlobals.mpIdleThreads->maThreads.front(); + aThreadGlobals.mpIdleThreads->maThreads.pop(); + p->assign(it->second); + ++it; + } + + if (it == itEnd) + // Finished! Exit the loop. + break; + } + + { + // Terminate all worker threads. + osl::MutexGuard aGuard(aThreadGlobals.mpWorkerThreads->maMtx); + for (size_t i = 0, n = aThreadGlobals.mpWorkerThreads->maThreads.size(); i < n; ++i) + { + WorkerThreadRef pWorker = aThreadGlobals.mpWorkerThreads->maThreads[i]; + pWorker->terminate(); + if (pWorker.is()) + pWorker->join(); + } + } + + // Delete all thread globals. + aThreadGlobals.mpWorkerThreads.reset(); + aThreadGlobals.mpIdleThreads.reset(); + +#else // non-threaded version + for( SheetFragmentVector::iterator it = rSheets.begin(), itEnd = rSheets.end(); it != itEnd; ++it) + { + // import the sheet fragment + rWorkbookHandler.importOoxFragment(it->second); + } +#endif +} + +} + void WorkbookFragment::finalizeImport() { ISegmentProgressBarRef xGlobalSegment = getProgressBar().createSegment( PROGRESS_LENGTH_GLOBALS ); @@ -318,11 +507,7 @@ void WorkbookFragment::finalizeImport() } // load all worksheets - for( SheetFragmentVector::iterator aIt = aSheetFragments.begin(), aEnd = aSheetFragments.end(); aIt != aEnd; ++aIt ) - { - // import the sheet fragment - importOoxFragment( aIt->second ); - } + importSheetFragments(*this, aSheetFragments); for( std::vector<WorksheetHelper*>::iterator aIt = maHelpers.begin(), aEnd = maHelpers.end(); aIt != aEnd; ++aIt ) { diff --git a/sc/source/filter/oox/workbookhelper.cxx b/sc/source/filter/oox/workbookhelper.cxx index 0fca151..32c8bd6 100644 --- a/sc/source/filter/oox/workbookhelper.cxx +++ b/sc/source/filter/oox/workbookhelper.cxx @@ -1012,11 +1012,16 @@ XmlFilterBase& WorkbookHelper::getOoxFilter() const return mrBookGlob.getOoxFilter(); } -bool WorkbookHelper::importOoxFragment( const ::rtl::Reference< FragmentHandler >& rxHandler ) +bool WorkbookHelper::importOoxFragment( const rtl::Reference<FragmentHandler>& rxHandler ) { return getOoxFilter().importFragment( rxHandler ); } +bool WorkbookHelper::importOoxFragment( const rtl::Reference<FragmentHandler>& rxHandler, oox::core::FastParser& rParser ) +{ + return getOoxFilter().importFragment(rxHandler, rParser); +} + // BIFF specific -------------------------------------------------------------- BiffType WorkbookHelper::getBiff() const _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
