sc/Library_sc.mk | 11 +++++ sc/inc/column.hxx | 2 - sc/inc/document.hxx | 2 - sc/inc/formulacell.hxx | 2 - sc/inc/table.hxx | 2 - sc/source/core/data/column2.cxx | 4 +- sc/source/core/data/documen8.cxx | 4 +- sc/source/core/data/formulacell.cxx | 17 ++++++-- sc/source/core/data/table1.cxx | 5 +- sc/source/core/inc/arraysumfunctor.hxx | 51 ------------------------- sc/source/core/tool/arraysumSSE2.cxx | 65 +++++++++++++++++++++++++++++++++ 11 files changed, 101 insertions(+), 64 deletions(-)
New commits: commit 3c2587a152476cbb0ca4a83138a4c34ec8065b32 Author: Dennis Francis <[email protected]> AuthorDate: Mon Oct 7 18:02:04 2019 +0530 Commit: Dennis Francis <[email protected]> CommitDate: Thu Oct 17 08:07:32 2019 +0200 move SSE2sum code to separate cxx file... and compile it with -arch:SSE2 if Windows. This code however gets called only if cpuid::hasSSE2() is true, so this does not cause problems with machines without SSE2 support. Change-Id: Ice23ac71d4c577b8811b08c74a3ca500a94fdc09 Reviewed-on: https://gerrit.libreoffice.org/80847 Tested-by: Jenkins Reviewed-by: Luboš Luňák <[email protected]> diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk index d594ca110f6d..9c057eaf6a6d 100644 --- a/sc/Library_sc.mk +++ b/sc/Library_sc.mk @@ -98,6 +98,17 @@ $(eval $(call gb_Library_use_libraries,sc,\ xo \ )) +ifeq ($(OS),WNT) +$(eval $(call gb_Library_add_exception_objects,sc,\ + sc/source/core/tool/arraysumSSE2, -arch:SSE2 \ +)) + +else +$(eval $(call gb_Library_add_exception_objects,sc,\ + sc/source/core/tool/arraysumSSE2 \ +)) +endif + $(eval $(call gb_Library_add_exception_objects,sc,\ sc/source/core/data/attarray \ sc/source/core/data/attrib \ diff --git a/sc/source/core/inc/arraysumfunctor.hxx b/sc/source/core/inc/arraysumfunctor.hxx index 7ef8a7face05..f1182874b1a1 100644 --- a/sc/source/core/inc/arraysumfunctor.hxx +++ b/sc/source/core/inc/arraysumfunctor.hxx @@ -14,7 +14,6 @@ #include <cstdint> #include <rtl/math.hxx> -#include <tools/simdsupport.hxx> #include <tools/simd.hxx> #include <tools/cpuid.hxx> @@ -87,56 +86,8 @@ public: } private: - double executeSSE2(size_t& i, const double* pCurrent) const - { -#if defined(LO_SSE2_AVAILABLE) - double fSum = 0.0; - size_t nRealSize = mnSize - i; - size_t nUnrolledSize = nRealSize - (nRealSize % 8); - - if (nUnrolledSize > 0) - { - __m128d sum1 = _mm_setzero_pd(); - __m128d sum2 = _mm_setzero_pd(); - __m128d sum3 = _mm_setzero_pd(); - __m128d sum4 = _mm_setzero_pd(); - - for (; i < nUnrolledSize; i += 8) - { - __m128d load1 = _mm_load_pd(pCurrent); - sum1 = _mm_add_pd(sum1, load1); - pCurrent += 2; - - __m128d load2 = _mm_load_pd(pCurrent); - sum2 = _mm_add_pd(sum2, load2); - pCurrent += 2; - - __m128d load3 = _mm_load_pd(pCurrent); - sum3 = _mm_add_pd(sum3, load3); - pCurrent += 2; - - __m128d load4 = _mm_load_pd(pCurrent); - sum4 = _mm_add_pd(sum4, load4); - pCurrent += 2; - } - sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4)); - - double temp; - - _mm_storel_pd(&temp, sum1); - fSum += temp; - - _mm_storeh_pd(&temp, sum1); - fSum += temp; - } - return fSum; -#else - (void) i; - (void) pCurrent; - return 0.0; -#endif - } + double executeSSE2(size_t& i, const double* pCurrent) const; double executeUnrolled(size_t& i, const double* pCurrent) const { size_t nRealSize = mnSize - i; diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx new file mode 100644 index 000000000000..894675335834 --- /dev/null +++ b/sc/source/core/tool/arraysumSSE2.cxx @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include <arraysumfunctor.hxx> +#include <tools/simdsupport.hxx> + +namespace sc +{ +double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const +{ +#if defined(LO_SSE2_AVAILABLE) + double fSum = 0.0; + size_t nRealSize = mnSize - i; + size_t nUnrolledSize = nRealSize - (nRealSize % 8); + + if (nUnrolledSize > 0) + { + __m128d sum1 = _mm_setzero_pd(); + __m128d sum2 = _mm_setzero_pd(); + __m128d sum3 = _mm_setzero_pd(); + __m128d sum4 = _mm_setzero_pd(); + + for (; i < nUnrolledSize; i += 8) + { + __m128d load1 = _mm_load_pd(pCurrent); + sum1 = _mm_add_pd(sum1, load1); + pCurrent += 2; + + __m128d load2 = _mm_load_pd(pCurrent); + sum2 = _mm_add_pd(sum2, load2); + pCurrent += 2; + + __m128d load3 = _mm_load_pd(pCurrent); + sum3 = _mm_add_pd(sum3, load3); + pCurrent += 2; + + __m128d load4 = _mm_load_pd(pCurrent); + sum4 = _mm_add_pd(sum4, load4); + pCurrent += 2; + } + sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4)); + + double temp; + + _mm_storel_pd(&temp, sum1); + fSum += temp; + + _mm_storeh_pd(&temp, sum1); + fSum += temp; + } + return fSum; +#else + (void)i; + (void)pCurrent; + return 0.0; +#endif +} +} \ No newline at end of file commit a5b394a4c6ce5aa93654ff6d57fc497bcea93001 Author: Dennis Francis <[email protected]> AuthorDate: Tue Oct 15 07:49:06 2019 +0530 Commit: Dennis Francis <[email protected]> CommitDate: Thu Oct 17 08:07:10 2019 +0200 Reuse pre-allocated ScInterpreter for HandleStuffAfterParallelCalculation Change-Id: Idf10bb214d6d82370512eeb39ba7786dd9bceb38 Reviewed-on: https://gerrit.libreoffice.org/80846 Tested-by: Jenkins Reviewed-by: Luboš Luňák <[email protected]> diff --git a/sc/inc/column.hxx b/sc/inc/column.hxx index 3b7ffbe645e1..18cf3de6231f 100644 --- a/sc/inc/column.hxx +++ b/sc/inc/column.hxx @@ -595,7 +595,7 @@ public: void CalculateInThread( ScInterpreterContext& rContext, SCROW nRow, size_t nLen, size_t nOffset, unsigned nThisThread, unsigned nThreadsTotal ); - void HandleStuffAfterParallelCalculation( SCROW nRow, size_t nLen ); + void HandleStuffAfterParallelCalculation( SCROW nRow, size_t nLen, ScInterpreter* pInterpreter ); void SetNumberFormat( SCROW nRow, sal_uInt32 nNumberFormat ); diff --git a/sc/inc/document.hxx b/sc/inc/document.hxx index 9e19ad228a36..256f105ed7fd 100644 --- a/sc/inc/document.hxx +++ b/sc/inc/document.hxx @@ -2153,7 +2153,7 @@ public: void SC_DLLPUBLIC SetFormulaResults( const ScAddress& rTopPos, const double* pResults, size_t nLen ); const ScDocumentThreadSpecific& CalculateInColumnInThread( ScInterpreterContext& rContext, const ScRange& rCalcRange, unsigned nThisThread, unsigned nThreadsTotal); - void HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen, SCTAB nTab ); + void HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen, SCTAB nTab, ScInterpreter* pInterpreter ); /** * Transfer a series of contiguous cell values from specified position to diff --git a/sc/inc/formulacell.hxx b/sc/inc/formulacell.hxx index 65a3a4af7733..9a5af01a3851 100644 --- a/sc/inc/formulacell.hxx +++ b/sc/inc/formulacell.hxx @@ -167,7 +167,7 @@ public: }; void InterpretTail( ScInterpreterContext&, ScInterpretTailParameter ); - void HandleStuffAfterParallelCalculation(); + void HandleStuffAfterParallelCalculation(ScInterpreter* pInterpreter); enum CompareState { NotEqual = 0, EqualInvariant, EqualRelativeRef }; diff --git a/sc/inc/table.hxx b/sc/inc/table.hxx index 78bfa854e33d..9756930b08e1 100644 --- a/sc/inc/table.hxx +++ b/sc/inc/table.hxx @@ -1014,7 +1014,7 @@ public: void CalculateInColumnInThread( ScInterpreterContext& rContext, SCCOL nColStart, SCCOL nColEnd, SCROW nRowStart, SCROW nRowEnd, unsigned nThisThread, unsigned nThreadsTotal); - void HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen); + void HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen, ScInterpreter* pInterpreter); /** * Either start all formula cells as listeners unconditionally, or start diff --git a/sc/source/core/data/column2.cxx b/sc/source/core/data/column2.cxx index c0fcc103b676..5e62a01f38a1 100644 --- a/sc/source/core/data/column2.cxx +++ b/sc/source/core/data/column2.cxx @@ -2966,7 +2966,7 @@ void ScColumn::CalculateInThread( ScInterpreterContext& rContext, SCROW nRow, si } } -void ScColumn::HandleStuffAfterParallelCalculation( SCROW nRow, size_t nLen ) +void ScColumn::HandleStuffAfterParallelCalculation( SCROW nRow, size_t nLen, ScInterpreter* pInterpreter ) { sc::CellStoreType::position_type aPos = maCells.position(nRow); sc::CellStoreType::iterator it = aPos.first; @@ -2988,7 +2988,7 @@ void ScColumn::HandleStuffAfterParallelCalculation( SCROW nRow, size_t nLen ) for (size_t i = 0; i < nLen; ++i, ++itCell) { ScFormulaCell& rCell = **itCell; - rCell.HandleStuffAfterParallelCalculation(); + rCell.HandleStuffAfterParallelCalculation(pInterpreter); } } diff --git a/sc/source/core/data/documen8.cxx b/sc/source/core/data/documen8.cxx index 5068656aab39..787d0d7fed85 100644 --- a/sc/source/core/data/documen8.cxx +++ b/sc/source/core/data/documen8.cxx @@ -428,7 +428,7 @@ const ScDocumentThreadSpecific& ScDocument::CalculateInColumnInThread( ScInterpr return maThreadSpecific; } -void ScDocument::HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen, SCTAB nTab ) +void ScDocument::HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen, SCTAB nTab, ScInterpreter* pInterpreter ) { assert(!IsThreadedGroupCalcInProgress()); for( const DelayedSetNumberFormat& data : GetNonThreadedContext().maDelayedSetNumberFormat) @@ -439,7 +439,7 @@ void ScDocument::HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nCo if (!pTab) return; - pTab->HandleStuffAfterParallelCalculation(nColStart, nColEnd, nRow, nLen); + pTab->HandleStuffAfterParallelCalculation(nColStart, nColEnd, nRow, nLen, pInterpreter); } void ScDocument::InvalidateTextWidth( const ScAddress* pAdrFrom, const ScAddress* pAdrTo, diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx index d3dfbd18ea1a..9eb36602d319 100644 --- a/sc/source/core/data/formulacell.cxx +++ b/sc/source/core/data/formulacell.cxx @@ -2298,14 +2298,21 @@ void ScFormulaCell::InterpretTail( ScInterpreterContext& rContext, ScInterpretTa } } -void ScFormulaCell::HandleStuffAfterParallelCalculation() +void ScFormulaCell::HandleStuffAfterParallelCalculation(ScInterpreter* pInterpreter) { if( pCode->GetCodeLen() && pDocument ) { if ( !pCode->IsRecalcModeAlways() ) pDocument->RemoveFromFormulaTree( this ); - std::unique_ptr<ScInterpreter> pInterpreter(new ScInterpreter( this, pDocument, pDocument->GetNonThreadedContext(), aPos, *pCode )); + std::unique_ptr<ScInterpreter> pScopedInterpreter; + if (pInterpreter) + pInterpreter->Init(this, aPos, *pCode); + else + { + pScopedInterpreter.reset(new ScInterpreter( this, pDocument, pDocument->GetNonThreadedContext(), aPos, *pCode )); + pInterpreter = pScopedInterpreter.get(); + } switch (pInterpreter->GetVolatileType()) { @@ -4843,6 +4850,7 @@ bool ScFormulaCell::InterpretFormulaGroupThreading(sc::FormulaLogger::GroupScope } } + std::vector<std::unique_ptr<ScInterpreter>> aInterpreters(nThreadCount); { assert(!pDocument->IsThreadedGroupCalcInProgress()); pDocument->SetThreadedGroupCalcInProgress(true); @@ -4853,7 +4861,6 @@ bool ScFormulaCell::InterpretFormulaGroupThreading(sc::FormulaLogger::GroupScope std::shared_ptr<comphelper::ThreadTaskTag> aTag = comphelper::ThreadPool::createThreadTaskTag(); ScThreadedInterpreterContextGetterGuard aContextGetterGuard(nThreadCount, *pDocument, pNonThreadedFormatter); ScInterpreterContext* context = nullptr; - std::vector<std::unique_ptr<ScInterpreter>> aInterpreters(nThreadCount); for (int i = 0; i < nThreadCount; ++i) { @@ -4887,7 +4894,9 @@ bool ScFormulaCell::InterpretFormulaGroupThreading(sc::FormulaLogger::GroupScope ScAddress aStartPos(mxGroup->mpTopCell->aPos); SCROW nSpanLen = nEndOffset - nStartOffset + 1; aStartPos.SetRow(aStartPos.Row() + nStartOffset); - pDocument->HandleStuffAfterParallelCalculation(nColStart, nColEnd, aStartPos.Row(), nSpanLen, aStartPos.Tab()); + // Reuse one of the previously allocated interpreter objects here. + pDocument->HandleStuffAfterParallelCalculation(nColStart, nColEnd, aStartPos.Row(), nSpanLen, + aStartPos.Tab(), aInterpreters[0].get()); return true; } diff --git a/sc/source/core/data/table1.cxx b/sc/source/core/data/table1.cxx index 69e0d7ac9713..84c4210775c4 100644 --- a/sc/source/core/data/table1.cxx +++ b/sc/source/core/data/table1.cxx @@ -2485,12 +2485,13 @@ void ScTable::CalculateInColumnInThread( ScInterpreterContext& rContext, } } -void ScTable::HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen) +void ScTable::HandleStuffAfterParallelCalculation( SCCOL nColStart, SCCOL nColEnd, SCROW nRow, size_t nLen, + ScInterpreter* pInterpreter) { assert(ValidCol(nColStart) && ValidCol(nColEnd)); for (SCCOL nCurrCol = nColStart; nCurrCol <= nColEnd; ++nCurrCol) - aCol[nCurrCol].HandleStuffAfterParallelCalculation( nRow, nLen ); + aCol[nCurrCol].HandleStuffAfterParallelCalculation( nRow, nLen, pInterpreter ); } #if DUMP_COLUMN_STORAGE _______________________________________________ Libreoffice-commits mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
