sw/source/filter/ww8/docxattributeoutput.cxx | 84 ++++++++++++++------------- sw/source/filter/ww8/docxattributeoutput.hxx | 22 ++++++- 2 files changed, 65 insertions(+), 41 deletions(-)
New commits: commit 2f9f82f364517968d216ad5205b0f4c368a3ce89 Author: Justin Luth <[email protected]> AuthorDate: Sat Feb 14 20:16:10 2026 -0500 Commit: Justin Luth <[email protected]> CommitDate: Fri Feb 20 20:44:26 2026 +0100 tdf#170602 docx export: Revise how SdtBlockHelper is cleared No Functional Change intended ultimately, although with this scope of modification some kind of behavioural change is inevitable. This patch lays the groundwork for the rest of this bug report's patchset. SdtBlockHelper so far has been set VERY LATE in the process. All the text and runs have already finished and we EndParagraph before we find out that this paragraph is actually a grabbagged blockSdt content control. But how a grabbagged blockSdt starts is very, umm, vague. So the grabbag is cleared almost as soon as it is filled to prevent it from spilling over into tables/flies/comments etc. In order to make the information available at the beginning of the paragraph (StartParagraph), the logic of how to use and clear this grabbag cache needed to be substantially revised. In order to accomplish this, I needed to keep track of what position was used to fill this SdtBlockHelper, and only initiate a write when the currentPos matches the SdtBlockHelper pos. The idea is to be able to fill the SdtBlockHelper early, and retain the information until the <w/:sdt> end marker has been written. Change-Id: I30443822f8e09fc083daa4f0492fc4f95ad7391b Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198891 Tested-by: Jenkins Reviewed-by: Justin Luth <[email protected]> diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index dc31ef6d75a2..b0185240ee74 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -784,11 +784,16 @@ void SdtBlockHelper::clearGrabbagValues() m_nTabIndex = 0; } -void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, bool bRunTextIsOn, bool bParagraphHasDrawing) +void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, + const SwPosition* pStartPosition, + bool bRunTextIsOn, bool bParagraphHasDrawing) { if (!m_oSdtPrToken.has_value()) return; // not a full Sdt definition + if (pStartPosition != m_pStartPosition) + return; // Sdt grabbag data is not for the current paragraph - only used for m_aParagraphSdt + // sdt start mark pSerializer->mark(DocxAttributeOutput::Tag_WriteSdtBlock); @@ -838,9 +843,6 @@ void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSeriali // write the ending tags after the paragraph m_bStartedSdt = true; - - // clear sdt status - clearGrabbagValues(); } void SdtBlockHelper::WriteExtraParams(const ::sax_fastparser::FSHelperPtr& pSerializer) @@ -899,10 +901,22 @@ void SdtBlockHelper::EndSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerialize pSerializer->endElementNS(XML_w, XML_sdtContent); pSerializer->endElementNS(XML_w, XML_sdt); m_bStartedSdt = false; + m_pStartPosition = nullptr; + clearGrabbagValues(); } -void SdtBlockHelper::GetSdtParamsFromGrabBag(const uno::Sequence<beans::PropertyValue>& aGrabBagSdt) +void SdtBlockHelper::GetSdtParamsFromGrabBag(const uno::Sequence<beans::PropertyValue>& aGrabBagSdt, + const SwPosition* pStartPosition) { + if (m_bStartedSdt) + return; // must not change grabbag cache while <w:sdt> is being written + + if (m_pStartPosition && pStartPosition == m_pStartPosition) + return; // m_aParagraphSdt's params have already been cached from the grabbag. + + clearGrabbagValues(); + m_pStartPosition = pStartPosition; // grabbag cache is valid for this paragraph + for (const beans::PropertyValue& aPropertyValue : aGrabBagSdt) { if (aPropertyValue.Name == "ooxml:CT_SdtPr_checkbox") @@ -1281,7 +1295,8 @@ void DocxAttributeOutput::EndParagraph( const ww8::WW8TableNodeInfoInner::Pointe // on export sdt blocks are never nested ATM if (!m_aParagraphSdt.m_bStartedSdt) { - m_aParagraphSdt.WriteSdtBlock(m_pSerializer, m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); + m_aParagraphSdt.WriteSdtBlock(m_pSerializer, m_rExport.m_pCurPam->Start(), + m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); if (m_aParagraphSdt.m_bStartedSdt) { @@ -1291,12 +1306,6 @@ void DocxAttributeOutput::EndParagraph( const ww8::WW8TableNodeInfoInner::Pointe m_rExport.SdrExporter().setParagraphSdtOpen(true); } } - else - { - //These should be written out to the actual Node and not to the anchor. - //Clear them as they will be repopulated when the node is processed. - m_aParagraphSdt.clearGrabbagValues(); - } m_pSerializer->mark(Tag_StartParagraph_2); @@ -2118,15 +2127,7 @@ void DocxAttributeOutput::EndRun(const SwTextNode* pNode, sal_Int32 nPos, sal_In // enclose in a sdt block, if necessary: if one is already started, then don't do it for now // (so on export sdt blocks are never nested ATM) if (!m_aRunSdt.m_bStartedSdt) - { - m_aRunSdt.WriteSdtBlock(m_pSerializer, m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); - } - else - { - //These should be written out to the actual Node and not to the anchor. - //Clear them as they will be repopulated when the node is processed. - m_aRunSdt.clearGrabbagValues(); - } + m_aRunSdt.WriteSdtBlock(m_pSerializer, nullptr, m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); m_pSerializer->mergeTopMarks(Tag_StartRun_1); @@ -2581,7 +2582,7 @@ void DocxAttributeOutput::WriteFormDateStart(const OUString& sFullDate, const OU { // There are some extra sdt parameters came from grab bag SdtBlockHelper aSdtBlock; - aSdtBlock.GetSdtParamsFromGrabBag(aGrabBagSdt); + aSdtBlock.GetSdtParamsFromGrabBag(aGrabBagSdt, nullptr); aSdtBlock.WriteExtraParams(m_pSerializer); } @@ -2616,7 +2617,7 @@ void DocxAttributeOutput::WriteSdtPlainText(const OUString & sValue, const uno:: { // There are some extra sdt parameters came from grab bag SdtBlockHelper aSdtBlock; - aSdtBlock.GetSdtParamsFromGrabBag(aGrabBagSdt); + aSdtBlock.GetSdtParamsFromGrabBag(aGrabBagSdt, nullptr); aSdtBlock.WriteExtraParams(m_pSerializer); if (aSdtBlock.m_oSdtPrToken.has_value() && *aSdtBlock.m_oSdtPrToken) @@ -10544,7 +10545,7 @@ void DocxAttributeOutput::ParaGrabBag(const SfxGrabBagItem& rItem) { const uno::Sequence<beans::PropertyValue> aGrabBagSdt = rGrabBagElement.second.get< uno::Sequence<beans::PropertyValue> >(); - m_aParagraphSdt.GetSdtParamsFromGrabBag(aGrabBagSdt); + m_aParagraphSdt.GetSdtParamsFromGrabBag(aGrabBagSdt, m_rExport.m_pCurPam->Start()); } else if (rGrabBagElement.first == "ParaCnfStyle") { @@ -10663,7 +10664,7 @@ void DocxAttributeOutput::CharGrabBag( const SfxGrabBagItem& rItem ) { const uno::Sequence<beans::PropertyValue> aGrabBagSdt = rGrabBagElement.second.get< uno::Sequence<beans::PropertyValue> >(); - m_aRunSdt.GetSdtParamsFromGrabBag(aGrabBagSdt); + m_aRunSdt.GetSdtParamsFromGrabBag(aGrabBagSdt, nullptr); } else SAL_INFO("sw.ww8", "DocxAttributeOutput::CharGrabBag: unhandled grab bag property " << rGrabBagElement.first); diff --git a/sw/source/filter/ww8/docxattributeoutput.hxx b/sw/source/filter/ww8/docxattributeoutput.hxx index b37547aac8f2..fb7e7feda1be 100644 --- a/sw/source/filter/ww8/docxattributeoutput.hxx +++ b/sw/source/filter/ww8/docxattributeoutput.hxx @@ -163,12 +163,20 @@ class SdtBlockHelper public: SdtBlockHelper() : m_bStartedSdt(false) + , m_pStartPosition(nullptr) , m_bShowingPlaceHolder(false) , m_nTabIndex(0) {} // m_bStartedSdt tracks whether startElementNS(XML_w, XML_sdt) has been written bool m_bStartedSdt; + // In order to cache the SdtBlockHelper value, some mechanism is needed to check its validity. + // Currently this is needed only for m_aParagraphSdt, so tracking the SwPosition is sufficient. + + // If the SDT has not beeen started (!m_bStartedSdt) and the text positions do not match, + // then this SdtBlockHelper cache may be cleared and re-populated. + const SwPosition* m_pStartPosition; // only used by m_aParagraphSdt + // m_oSdtPrToken is a key GrabBag value, with a two-fold purpose: // - the absence of m_oSdtPrToken also means that (XML_w, XML_sdt) should not be written // - it describes the type of content control: richText(0), plainText, checkbox, dropdown... @@ -190,13 +198,17 @@ public: void clearGrabbagValues(); - void WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, bool bRunTextIsOn, bool bParagraphHasDrawing); + // pStartPosition must be nullptr unless this SdtBlockHelper is m_aParagraphSdt. + void WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, + const SwPosition* pStartPosition, + bool bRunTextIsOn, bool bParagraphHasDrawing); void WriteExtraParams(const ::sax_fastparser::FSHelperPtr& pSerializer); /// Closes a currently open SDT block. void EndSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer); - void GetSdtParamsFromGrabBag(const uno::Sequence<beans::PropertyValue>& aGrabBagSdt); + void GetSdtParamsFromGrabBag(const uno::Sequence<beans::PropertyValue>& aGrabBagSdt, + const SwPosition* pStartPosition); }; /// The class that has handlers for various resource types when exporting as DOCX. @@ -1085,7 +1097,13 @@ private: // store hardcoded value which was set during import. sal_Int32 m_nParaBeforeSpacing,m_nParaAfterSpacing; + // m_aParagraphSdt contains a grabbagged block content control that needs to be round-tripped + // because in LO it is not a native content control. + // Some content controls can span multiple paragraphs. + // It starts at the paragraph containing the 'SdtPr' grabbag property, + // and ends at the paragraph before the one containing the 'ParaSdtEndBefore' property. SdtBlockHelper m_aParagraphSdt; + // Same as m_aParagraphSdt except it ends on the run before the one containng 'SdtEndBefore' SdtBlockHelper m_aRunSdt; std::vector<std::map<SvxBoxItemLine, css::table::BorderLine2>> m_aTableStyleConfs; commit c06be927b6d43dc286a913a09c7bf865daa943d6 Author: Justin Luth <[email protected]> AuthorDate: Sat Feb 14 20:40:53 2026 -0500 Commit: Justin Luth <[email protected]> CommitDate: Fri Feb 20 20:44:14 2026 +0100 NFC docx export: move useful code snippet into lcl_hasParaSdtEndBefore Change-Id: I59d04beb9a2445da1bc76663f5fe0590348f3e7d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199394 Tested-by: Jenkins Reviewed-by: Justin Luth <[email protected]> diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index 841edf06aa31..dc31ef6d75a2 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -440,6 +440,21 @@ bool lclHasSolidFillTransformations(const model::ComplexColor& aComplexColor) return idx != transformations.end(); } +// Does this paragraph indicate that a grabbagged 'SdtPr' (in a previous paragraph) should end here? +bool lcl_hasParaSdtEndBefore(const SwNode& rNode) +{ + const SwTextNode* pTextNode = rNode.GetTextNode(); + if (!pTextNode || !pTextNode->GetpSwAttrSet()) + return false; + + const SfxGrabBagItem* pParaGrabBag = pTextNode->GetpSwAttrSet()->GetItem(RES_PARATR_GRABBAG); + if (!pParaGrabBag) + return false; + + const std::map<OUString, css::uno::Any>& rMap = pParaGrabBag->GetGrabBag(); + return rMap.contains(u"ParaSdtEndBefore"_ustr); +} + } // end anonymous namespace void DocxAttributeOutput::RTLAndCJKState( bool bIsRTL, sal_uInt16 /*nScript*/ ) @@ -596,20 +611,10 @@ sal_Int32 DocxAttributeOutput::StartParagraph(const ww8::WW8TableNodeInfo::Point // Look up the "sdt end before this paragraph" property early, when it // would normally arrive, it would be too late (would be after the // paragraph start has been written). - bool bEndParaSdt = false; - if (m_aParagraphSdt.m_bStartedSdt) - { - SwTextNode* pTextNode = m_rExport.m_pCurPam->GetPointNode().GetTextNode(); - if (pTextNode && pTextNode->GetpSwAttrSet()) - { - const SfxItemSet* pSet = pTextNode->GetpSwAttrSet(); - if (const SfxGrabBagItem* pParaGrabBag = pSet->GetItem(RES_PARATR_GRABBAG)) - { - const std::map<OUString, css::uno::Any>& rMap = pParaGrabBag->GetGrabBag(); - bEndParaSdt = m_aParagraphSdt.m_bStartedSdt && rMap.contains(u"ParaSdtEndBefore"_ustr); - } - } - } + const bool bEndParaSdt + = m_aParagraphSdt.m_bStartedSdt + && lcl_hasParaSdtEndBefore(m_rExport.m_pCurPam->GetPointNode()); + // TODO also avoid multiline paragraphs in those SDT types for shape text if (bEndParaSdt || (m_aParagraphSdt.m_bStartedSdt && m_bHadSectPr)) {
