sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx |binary sw/qa/extras/ooxmlexport/ooxmlexport25.cxx | 11 sw/qa/extras/ooxmlexport/ooxmlexport4.cxx | 7 sw/source/filter/ww8/docxattributeoutput.cxx | 119 +++++++++- sw/source/filter/ww8/docxattributeoutput.hxx | 13 - sw/source/filter/ww8/docxexport.cxx | 8 6 files changed, 149 insertions(+), 9 deletions(-)
New commits: commit ab0b0c124a7c0895053c135f3f6e28330857c2a2 Author: Justin Luth <[email protected]> AuthorDate: Sat Feb 14 21:27:42 2026 -0500 Commit: Justin Luth <[email protected]> CommitDate: Fri Feb 20 20:43:03 2026 +0100 NFC docx export: bForceRichText helps for code reading Two separate variables were passed to the function, and both were only used together. So combine them into one variable and rename it to be clearer about the intention. This also allows it to be easier to add more reasons why a richText control should be forced. Change-Id: Icf4528657bc7c06de4da3f4afbc68a15f953b60e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199646 Reviewed-by: Miklos Vajna <[email protected]> Tested-by: Jenkins CollaboraOffice <[email protected]> Reviewed-by: Justin Luth <[email protected]> diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index c51672d0e6b4..b8f9524fc1ab 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -782,8 +782,7 @@ void SdtBlockHelper::clearGrabbagValues() } void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, - const SwPosition* pStartPosition, - bool bRunTextIsOn, bool bParagraphHasDrawing) + const SwPosition* pStartPosition, bool bForceRichText) { if (!m_oSdtPrToken.has_value()) return; // not a full Sdt definition @@ -803,6 +802,7 @@ void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSeriali if (m_oSdtPrToken.has_value() && *m_oSdtPrToken && m_pTokenChildren.is()) { + assert((!bForceRichText || m_oSdtPrToken != FSNS(XML_w14, XML_checkbox)) && "This document will probably be reported as corrupt by MS Word"); if (!m_pTokenAttributes.is()) pSerializer->startElement(*m_oSdtPrToken); else @@ -820,7 +820,7 @@ void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSeriali pSerializer->endElement(*m_oSdtPrToken); } - else if (m_oSdtPrToken.has_value() && *m_oSdtPrToken && !(bRunTextIsOn && bParagraphHasDrawing)) + else if (m_oSdtPrToken.has_value() && *m_oSdtPrToken && !bForceRichText) { if (!m_pTokenAttributes.is()) pSerializer->singleElement(*m_oSdtPrToken); @@ -1292,8 +1292,9 @@ void DocxAttributeOutput::EndParagraph( const ww8::WW8TableNodeInfoInner::Pointe // on export sdt blocks are never nested ATM if (!m_aParagraphSdt.m_bStartedSdt) { - m_aParagraphSdt.WriteSdtBlock(m_pSerializer, m_rExport.m_pCurPam->Start(), - m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); + const bool bForceRichText + = m_bRunTextIsOn && m_rExport.SdrExporter().IsParagraphHasDrawing(); + m_aParagraphSdt.WriteSdtBlock(m_pSerializer, m_rExport.m_pCurPam->Start(), bForceRichText); if (m_aParagraphSdt.m_bStartedSdt) { @@ -2128,7 +2129,11 @@ void DocxAttributeOutput::EndRun(const SwTextNode* pNode, sal_Int32 nPos, sal_In // enclose in a sdt block, if necessary: if one is already started, then don't do it for now // (so on export sdt blocks are never nested ATM) if (!m_aRunSdt.m_bStartedSdt) - m_aRunSdt.WriteSdtBlock(m_pSerializer, nullptr, m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); + { + const bool bForceRichText + = m_bRunTextIsOn && m_rExport.SdrExporter().IsParagraphHasDrawing(); + m_aRunSdt.WriteSdtBlock(m_pSerializer, nullptr, bForceRichText); + } m_pSerializer->mergeTopMarks(Tag_StartRun_1); diff --git a/sw/source/filter/ww8/docxattributeoutput.hxx b/sw/source/filter/ww8/docxattributeoutput.hxx index be09237a9c60..fa6380be64cf 100644 --- a/sw/source/filter/ww8/docxattributeoutput.hxx +++ b/sw/source/filter/ww8/docxattributeoutput.hxx @@ -200,9 +200,9 @@ public: void clearGrabbagValues(); // pStartPosition must be nullptr unless this SdtBlockHelper is m_aParagraphSdt. + // ForceRichText (by not writing any SdtPrToken) to avoid creating a corrupt document. void WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, - const SwPosition* pStartPosition, - bool bRunTextIsOn, bool bParagraphHasDrawing); + const SwPosition* pStartPosition, bool bForceRichText); void WriteExtraParams(const ::sax_fastparser::FSHelperPtr& pSerializer); /// Closes a currently open SDT block. commit 36a0211ff326f295e1f2aa054261d9345fd65748 Author: Justin Luth <[email protected]> AuthorDate: Sat Feb 14 20:42:41 2026 -0500 Commit: Justin Luth <[email protected]> CommitDate: Fri Feb 20 20:42:52 2026 +0100 tdf#170602 docx export: write bookmarkEnd after blockSdt Microsoft Word considers a document to be corrupt if certain blockSdt's contain a bookmark end. So, collect them and write them out separately after the Sdt has closed. This is a bit tricky because we output the whole paragraph before we even figure out whether the paragraph has a hidden Sdt personality. pre-emptive unit test: make CppunitTest_sw_ooxmlexport4 CPPUNIT_TEST_NAME=testSimpleSdts make CppunitTest_sw_ooxmlexport25 \ CPPUNIT_TEST_NAME=testTdf170602_checkbox_bookmarkEnd Change-Id: I0ac2cfa209168ace83cb360262166940983073cb Reviewed-on: https://gerrit.libreoffice.org/c/core/+/199645 Reviewed-by: Justin Luth <[email protected]> Tested-by: Jenkins CollaboraOffice <[email protected]> diff --git a/sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx b/sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx new file mode 100644 index 000000000000..17ad6fbca951 Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx differ diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx index ad28a24cc4fa..21f2c1a055b5 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx @@ -241,6 +241,17 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf170389_manyTabstops) CPPUNIT_ASSERT_GREATER(sal_Int32(1500), nSize); // nSize > 1500 } +CPPUNIT_TEST_FIXTURE(Test, testTdf170602_checkbox_bookmarkEnd) +{ + createSwDoc("tdf170602_checkbox_bookmarkEnd.docx"); + + save(TestFilter::DOCX); + + xmlDocUniquePtr pXmlDoc = parseExport(u"word/document.xml"_ustr); + // MS Word reports document as corrupt if a plainText blockSdt contains a bookmarkEnd + assertXPath(pXmlDoc, "//w:body/w:tbl/w:tr[2]/w:tc[1]/w:bookmarkEnd", 1); // Tempestades +} + CPPUNIT_TEST_FIXTURE(Test, testInvalidDatetimeInProps) { createSwDoc("invalidDatetimeInProps.fodt"); diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx index a442f1736cd5..b326c9958829 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx @@ -1137,6 +1137,13 @@ CPPUNIT_TEST_FIXTURE(Test, testSimpleSdts) assertXPath(pXmlDoc, "/w:document/w:body/w:sdt[1]/w:sdtPr/w:picture", 1); assertXPath(pXmlDoc, "/w:document/w:body/w:sdt[2]/w:sdtPr/w:group", 1); assertXPath(pXmlDoc, "/w:document/w:body/w:p[4]/w:sdt/w:sdtPr/w:citation", 1); + + // tdf#170602: bookmarkEnd not allowed inside plainText Sdt + // This is a pre-emptive test - just asserting where it currently is. + // Probably the bookmark belongs before the w:sdt, + // but definitely not after the first </w:r> + // and definitely not bookmarking the entire sdt (not after the picture </w:sdt>) + assertXPath(pXmlDoc, "/w:document/w:body/w:sdt[1]/w:sdtContent/w:p/w:bookmarkEnd", 1); } CPPUNIT_TEST_FIXTURE(Test, testEmbeddedExcelChart) diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index 50663e6b7d27..c51672d0e6b4 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -3813,6 +3813,106 @@ void DocxAttributeOutput::GetSdtEndBefore(const SdrObject* pSdrObj) pProp->Value >>= m_bEndCharSdt; } +std::optional<sal_Int32> DocxAttributeOutput::GetGrabBagParaSdtPrToken() +{ + // NOTE: just because w:sdt has started doesn't mean THIS paragraph will be in the sdt. + // It won't be if lcl_hasParaSdtEndBefore (unless it also has 'SdtPr'), + // So don't call this function before StartParagraph has completed EndParaSdtBlock. + + if (m_aParagraphSdt.m_bStartedSdt) + return m_aParagraphSdt.m_oSdtPrToken; + + // update from the grabbag + SwPosition* pStartPosition = m_rExport.m_pCurPam->Start(); + const SwTextNode* pTextNd = pStartPosition->GetNode().GetTextNode(); + if (!pTextNd) + return std::nullopt; + const SfxItemSet* pSet = pTextNd->GetpSwAttrSet(); + if (!pSet) + return std::nullopt; + const SfxGrabBagItem* pParaGrabBag = pSet->GetItem(RES_PARATR_GRABBAG); + if (!pParaGrabBag) + return std::nullopt; + std::map<OUString, css::uno::Any> rMap = pParaGrabBag->GetGrabBag(); + if (!rMap.contains(u"SdtPr"_ustr)) + return std::nullopt; + + const uno::Sequence<beans::PropertyValue> aGrabBagSdt + = rMap[u"SdtPr"_ustr].get<uno::Sequence<beans::PropertyValue>>(); + m_aParagraphSdt.GetSdtParamsFromGrabBag(aGrabBagSdt, pStartPosition); + + return m_aParagraphSdt.m_oSdtPrToken; +} + +// Microsoft Word complains about a corrupt document +// if a bookmarkEnd exists inside most types of blockSdt content controls. +bool DocxAttributeOutput::DoesParaSdtPreventBookmarkEnd(const sal_Int32 nPos) +{ + if (!nPos && !m_aParagraphSdt.m_bStartedSdt) + return false; // don't delay position 0 bookmarkEnds. They should be in front of the Sdt. + + SwPosition* pStartPosition = m_rExport.m_pCurPam->Start(); + const SwTextNode* pTextNd = pStartPosition->GetNode().GetTextNode(); + if (!pTextNd && !m_aParagraphSdt.m_bStartedSdt) + return false; // there cannot be a paragraph blockSdt here. + + SwTextAttr* pContentControl = nullptr; + if (pTextNd) + { + pContentControl = pTextNd->GetTextAttrAt(nPos, RES_TXTATR_CONTENTCONTROL, + sw::GetTextAttrMode::Default); + } + + // Not concerned with real content controls - only grabbagged ones are causing problems. + if (pContentControl) // native content controls are always runSdt + return false; // not dealing with a grabbag blockSdt here. + + // NOTE: just because w:sdt has started doesn't mean THIS paragraph will be in the sdt. + // It won't be if lcl_hasParaSdtEndBefore (unless it also has 'SdtPr'), + // but practically speaking, m_bStartedSdt will be turned off + // before THIS paragraph tries to process any bookmarks - so the complication is just ignored. + bool bParagraphHasGrabBagSdt = m_aParagraphSdt.m_bStartedSdt; + + // check if m_aParagraphSdt is cached for the right paragraph + if (!bParagraphHasGrabBagSdt && m_aParagraphSdt.m_pStartPosition == pStartPosition) + { + // yes - it is already cached. + if (!m_aParagraphSdt.m_oSdtPrToken.has_value()) + return false; // not a full Sdt + + bParagraphHasGrabBagSdt = true; + } + + if (!bParagraphHasGrabBagSdt && pTextNd) + bParagraphHasGrabBagSdt = GetGrabBagParaSdtPrToken().has_value(); + + bool bSdtDoesNotAllowBookmarkEnd = false; + if (bParagraphHasGrabBagSdt) + { + // rich blockSdt are allowed to contain bookmarkEnd: richText(0), Group ... + // plain blockSdt are not allowed to contain bookmarkEnd. + switch (*m_aParagraphSdt.m_oSdtPrToken) + { + case FSNS(XML_w14, XML_checkbox): + case FSNS(XML_w, XML_text): + case FSNS(XML_w, XML_dropDownList): + case FSNS(XML_w, XML_comboBox): + // case FSNS(XML_w, XML_date): + case FSNS(XML_w, XML_picture): + bSdtDoesNotAllowBookmarkEnd = true; + break; + default: + break; + } + } + return bSdtDoesNotAllowBookmarkEnd; +} + +void DocxAttributeOutput::WriteBookmarkEndWithParaSdt(const OUString& rString) +{ + m_aParagraphSdt.m_vBookmarkEnd.push_back(rString); +} + void DocxAttributeOutput::WritePostponedGraphic() { for (const auto & rPostponedDiagram : *m_oPostponedGraphic) @@ -7199,6 +7299,8 @@ void DocxAttributeOutput::EndParaSdtBlock() { // Paragraph-level SDT still open? Close it now. m_aParagraphSdt.EndSdtBlock(m_pSerializer); + + DoWriteBookmarksEnd(m_aParagraphSdt.m_vBookmarkEnd); } } diff --git a/sw/source/filter/ww8/docxattributeoutput.hxx b/sw/source/filter/ww8/docxattributeoutput.hxx index fb7e7feda1be..be09237a9c60 100644 --- a/sw/source/filter/ww8/docxattributeoutput.hxx +++ b/sw/source/filter/ww8/docxattributeoutput.hxx @@ -176,6 +176,7 @@ public: // If the SDT has not beeen started (!m_bStartedSdt) and the text positions do not match, // then this SdtBlockHelper cache may be cleared and re-populated. const SwPosition* m_pStartPosition; // only used by m_aParagraphSdt + std::vector<OUString> m_vBookmarkEnd; // only used by m_aParagraphSdt // m_oSdtPrToken is a key GrabBag value, with a two-fold purpose: // - the absence of m_oSdtPrToken also means that (XML_w, XML_sdt) should not be written @@ -1159,6 +1160,14 @@ public: void SetAlternateContentChoiceOpen( bool bAltContentChoiceOpen ) { m_bAlternateContentChoiceOpen = bAltContentChoiceOpen; } bool IsAlternateContentChoiceOpen( ) const { return m_bAlternateContentChoiceOpen; } void GetSdtEndBefore(const SdrObject* pSdrObj); + + // returns m_aParagraphSdt's effective m_oSdtPrToken for the current node + std::optional<sal_Int32> GetGrabBagParaSdtPrToken(); + + // PlainText-y blockSdt content controls are considered corrupt if they contain a bookmarkEnd + bool DoesParaSdtPreventBookmarkEnd(const sal_Int32 nPos); + void WriteBookmarkEndWithParaSdt(const OUString& rString); + bool IsFirstParagraph() const { return m_bIsFirstParagraph; } /// Stores the table export state to the passed context and resets own state. diff --git a/sw/source/filter/ww8/docxexport.cxx b/sw/source/filter/ww8/docxexport.cxx index d06f58e228f3..8a98aca8b94f 100644 --- a/sw/source/filter/ww8/docxexport.cxx +++ b/sw/source/filter/ww8/docxexport.cxx @@ -172,6 +172,7 @@ void DocxExport::AppendBookmarks( const SwTextNode& rNode, sal_Int32 nCurrentPos IMarkVector aMarks; if ( GetBookmarks( rNode, nCurrentPos, nCurrentPos + nLen, aMarks ) ) { + const bool bSdtGetsBookmarkEnd = m_pAttrOutput->DoesParaSdtPreventBookmarkEnd(nCurrentPos); for ( MarkBase* pMark : aMarks ) { const sal_Int32 nStart = pMark->GetMarkStart().GetContentIndex(); @@ -181,7 +182,12 @@ void DocxExport::AppendBookmarks( const SwTextNode& rNode, sal_Int32 nCurrentPos aStarts.push_back( pMark->GetName().toString() ); if (nEnd == nCurrentPos && rNode == pMark->GetMarkEnd().GetNode()) - aEnds.push_back( pMark->GetName().toString() ); + { + if (bSdtGetsBookmarkEnd) + m_pAttrOutput->WriteBookmarkEndWithParaSdt(pMark->GetName().toString()); + else + aEnds.push_back(pMark->GetName().toString()); + } } }
