sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx |binary sw/qa/extras/ooxmlexport/ooxmlexport25.cxx | 11 sw/qa/extras/ooxmlexport/ooxmlexport4.cxx | 7 sw/source/filter/ww8/docxattributeoutput.cxx | 119 +++++++++- sw/source/filter/ww8/docxattributeoutput.hxx | 13 - sw/source/filter/ww8/docxexport.cxx | 8 6 files changed, 149 insertions(+), 9 deletions(-)
New commits: commit 835862b06de1a94bf9233d95b1f3f131e20ed6f6 Author: Justin Luth <[email protected]> AuthorDate: Sat Feb 14 21:27:42 2026 -0500 Commit: Justin Luth <[email protected]> CommitDate: Fri Feb 20 20:45:20 2026 +0100 NFC docx export: bForceRichText helps for code reading Two separate variables were passed to the function, and both were only used together. So combine them into one variable and rename it to be clearer about the intention. This also allows it to be easier to add more reasons why a richText control should be forced. Change-Id: Icf4528657bc7c06de4da3f4afbc68a15f953b60e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198854 Tested-by: Jenkins Reviewed-by: Justin Luth <[email protected]> diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index 1461ee55593b..f19770b282e2 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -785,8 +785,7 @@ void SdtBlockHelper::clearGrabbagValues() } void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, - const SwPosition* pStartPosition, - bool bRunTextIsOn, bool bParagraphHasDrawing) + const SwPosition* pStartPosition, bool bForceRichText) { if (!m_oSdtPrToken.has_value()) return; // not a full Sdt definition @@ -806,6 +805,7 @@ void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSeriali if (m_oSdtPrToken.has_value() && *m_oSdtPrToken && m_pTokenChildren.is()) { + assert((!bForceRichText || m_oSdtPrToken != FSNS(XML_w14, XML_checkbox)) && "This document will probably be reported as corrupt by MS Word"); if (!m_pTokenAttributes.is()) pSerializer->startElement(*m_oSdtPrToken); else @@ -823,7 +823,7 @@ void SdtBlockHelper::WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSeriali pSerializer->endElement(*m_oSdtPrToken); } - else if (m_oSdtPrToken.has_value() && *m_oSdtPrToken && !(bRunTextIsOn && bParagraphHasDrawing)) + else if (m_oSdtPrToken.has_value() && *m_oSdtPrToken && !bForceRichText) { if (!m_pTokenAttributes.is()) pSerializer->singleElement(*m_oSdtPrToken); @@ -1295,8 +1295,9 @@ void DocxAttributeOutput::EndParagraph( const ww8::WW8TableNodeInfoInner::Pointe // on export sdt blocks are never nested ATM if (!m_aParagraphSdt.m_bStartedSdt) { - m_aParagraphSdt.WriteSdtBlock(m_pSerializer, m_rExport.m_pCurPam->Start(), - m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); + const bool bForceRichText + = m_bRunTextIsOn && m_rExport.SdrExporter().IsParagraphHasDrawing(); + m_aParagraphSdt.WriteSdtBlock(m_pSerializer, m_rExport.m_pCurPam->Start(), bForceRichText); if (m_aParagraphSdt.m_bStartedSdt) { @@ -2127,7 +2128,11 @@ void DocxAttributeOutput::EndRun(const SwTextNode* pNode, sal_Int32 nPos, sal_In // enclose in a sdt block, if necessary: if one is already started, then don't do it for now // (so on export sdt blocks are never nested ATM) if (!m_aRunSdt.m_bStartedSdt) - m_aRunSdt.WriteSdtBlock(m_pSerializer, nullptr, m_bRunTextIsOn, m_rExport.SdrExporter().IsParagraphHasDrawing()); + { + const bool bForceRichText + = m_bRunTextIsOn && m_rExport.SdrExporter().IsParagraphHasDrawing(); + m_aRunSdt.WriteSdtBlock(m_pSerializer, nullptr, bForceRichText); + } m_pSerializer->mergeTopMarks(Tag_StartRun_1); diff --git a/sw/source/filter/ww8/docxattributeoutput.hxx b/sw/source/filter/ww8/docxattributeoutput.hxx index be09237a9c60..fa6380be64cf 100644 --- a/sw/source/filter/ww8/docxattributeoutput.hxx +++ b/sw/source/filter/ww8/docxattributeoutput.hxx @@ -200,9 +200,9 @@ public: void clearGrabbagValues(); // pStartPosition must be nullptr unless this SdtBlockHelper is m_aParagraphSdt. + // ForceRichText (by not writing any SdtPrToken) to avoid creating a corrupt document. void WriteSdtBlock(const ::sax_fastparser::FSHelperPtr& pSerializer, - const SwPosition* pStartPosition, - bool bRunTextIsOn, bool bParagraphHasDrawing); + const SwPosition* pStartPosition, bool bForceRichText); void WriteExtraParams(const ::sax_fastparser::FSHelperPtr& pSerializer); /// Closes a currently open SDT block. commit 3957b11722291d7284bc3c30913572fd696fba47 Author: Justin Luth <[email protected]> AuthorDate: Sat Feb 14 20:42:41 2026 -0500 Commit: Justin Luth <[email protected]> CommitDate: Fri Feb 20 20:45:09 2026 +0100 tdf#170602 docx export: write bookmarkEnd after blockSdt Microsoft Word considers a document to be corrupt if certain blockSdt's contain a bookmark end. So, collect them and write them out separately after the Sdt has closed. This is a bit tricky because we output the whole paragraph before we even figure out whether the paragraph has a hidden Sdt personality. pre-emptive unit test: make CppunitTest_sw_ooxmlexport4 CPPUNIT_TEST_NAME=testSimpleSdts make CppunitTest_sw_ooxmlexport25 \ CPPUNIT_TEST_NAME=testTdf170602_checkbox_bookmarkEnd Change-Id: I0ac2cfa209168ace83cb360262166940983073cb Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198710 Tested-by: Jenkins Reviewed-by: Justin Luth <[email protected]> diff --git a/sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx b/sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx new file mode 100644 index 000000000000..17ad6fbca951 Binary files /dev/null and b/sw/qa/extras/ooxmlexport/data/tdf170602_checkbox_bookmarkEnd.docx differ diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx index fc1fe5637b56..f7ea0e94b227 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport25.cxx @@ -248,6 +248,17 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf170389_manyTabstops) CPPUNIT_ASSERT_GREATER(sal_Int32(1500), nSize); // nSize > 1500 } +CPPUNIT_TEST_FIXTURE(Test, testTdf170602_checkbox_bookmarkEnd) +{ + createSwDoc("tdf170602_checkbox_bookmarkEnd.docx"); + + save(TestFilter::DOCX); + + xmlDocUniquePtr pXmlDoc = parseExport(u"word/document.xml"_ustr); + // MS Word reports document as corrupt if a plainText blockSdt contains a bookmarkEnd + assertXPath(pXmlDoc, "//w:body/w:tbl/w:tr[2]/w:tc[1]/w:bookmarkEnd", 1); // Tempestades +} + CPPUNIT_TEST_FIXTURE(Test, testInvalidDatetimeInProps) { createSwDoc("invalidDatetimeInProps.fodt"); diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx index 6540e8a523b9..8c85d8c8789a 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport4.cxx @@ -1121,6 +1121,13 @@ CPPUNIT_TEST_FIXTURE(Test, testSimpleSdts) assertXPath(pXmlDoc, "/w:document/w:body/w:sdt[1]/w:sdtPr/w:picture", 1); assertXPath(pXmlDoc, "/w:document/w:body/w:sdt[2]/w:sdtPr/w:group", 1); assertXPath(pXmlDoc, "/w:document/w:body/w:p[4]/w:sdt/w:sdtPr/w:citation", 1); + + // tdf#170602: bookmarkEnd not allowed inside plainText Sdt + // This is a pre-emptive test - just asserting where it currently is. + // Probably the bookmark belongs before the w:sdt, + // but definitely not after the first </w:r> + // and definitely not bookmarking the entire sdt (not after the picture </w:sdt>) + assertXPath(pXmlDoc, "/w:document/w:body/w:sdt[1]/w:sdtContent/w:p/w:bookmarkEnd", 1); } CPPUNIT_TEST_FIXTURE(Test, testEmbeddedExcelChart) diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx index b0185240ee74..1461ee55593b 100644 --- a/sw/source/filter/ww8/docxattributeoutput.cxx +++ b/sw/source/filter/ww8/docxattributeoutput.cxx @@ -3808,6 +3808,106 @@ void DocxAttributeOutput::GetSdtEndBefore(const SdrObject* pSdrObj) pProp->Value >>= m_bEndCharSdt; } +std::optional<sal_Int32> DocxAttributeOutput::GetGrabBagParaSdtPrToken() +{ + // NOTE: just because w:sdt has started doesn't mean THIS paragraph will be in the sdt. + // It won't be if lcl_hasParaSdtEndBefore (unless it also has 'SdtPr'), + // So don't call this function before StartParagraph has completed EndParaSdtBlock. + + if (m_aParagraphSdt.m_bStartedSdt) + return m_aParagraphSdt.m_oSdtPrToken; + + // update from the grabbag + SwPosition* pStartPosition = m_rExport.m_pCurPam->Start(); + const SwTextNode* pTextNd = pStartPosition->GetNode().GetTextNode(); + if (!pTextNd) + return std::nullopt; + const SfxItemSet* pSet = pTextNd->GetpSwAttrSet(); + if (!pSet) + return std::nullopt; + const SfxGrabBagItem* pParaGrabBag = pSet->GetItem(RES_PARATR_GRABBAG); + if (!pParaGrabBag) + return std::nullopt; + std::map<OUString, css::uno::Any> rMap = pParaGrabBag->GetGrabBag(); + if (!rMap.contains(u"SdtPr"_ustr)) + return std::nullopt; + + const uno::Sequence<beans::PropertyValue> aGrabBagSdt + = rMap[u"SdtPr"_ustr].get<uno::Sequence<beans::PropertyValue>>(); + m_aParagraphSdt.GetSdtParamsFromGrabBag(aGrabBagSdt, pStartPosition); + + return m_aParagraphSdt.m_oSdtPrToken; +} + +// Microsoft Word complains about a corrupt document +// if a bookmarkEnd exists inside most types of blockSdt content controls. +bool DocxAttributeOutput::DoesParaSdtPreventBookmarkEnd(const sal_Int32 nPos) +{ + if (!nPos && !m_aParagraphSdt.m_bStartedSdt) + return false; // don't delay position 0 bookmarkEnds. They should be in front of the Sdt. + + SwPosition* pStartPosition = m_rExport.m_pCurPam->Start(); + const SwTextNode* pTextNd = pStartPosition->GetNode().GetTextNode(); + if (!pTextNd && !m_aParagraphSdt.m_bStartedSdt) + return false; // there cannot be a paragraph blockSdt here. + + SwTextAttr* pContentControl = nullptr; + if (pTextNd) + { + pContentControl = pTextNd->GetTextAttrAt(nPos, RES_TXTATR_CONTENTCONTROL, + sw::GetTextAttrMode::Default); + } + + // Not concerned with real content controls - only grabbagged ones are causing problems. + if (pContentControl) // native content controls are always runSdt + return false; // not dealing with a grabbag blockSdt here. + + // NOTE: just because w:sdt has started doesn't mean THIS paragraph will be in the sdt. + // It won't be if lcl_hasParaSdtEndBefore (unless it also has 'SdtPr'), + // but practically speaking, m_bStartedSdt will be turned off + // before THIS paragraph tries to process any bookmarks - so the complication is just ignored. + bool bParagraphHasGrabBagSdt = m_aParagraphSdt.m_bStartedSdt; + + // check if m_aParagraphSdt is cached for the right paragraph + if (!bParagraphHasGrabBagSdt && m_aParagraphSdt.m_pStartPosition == pStartPosition) + { + // yes - it is already cached. + if (!m_aParagraphSdt.m_oSdtPrToken.has_value()) + return false; // not a full Sdt + + bParagraphHasGrabBagSdt = true; + } + + if (!bParagraphHasGrabBagSdt && pTextNd) + bParagraphHasGrabBagSdt = GetGrabBagParaSdtPrToken().has_value(); + + bool bSdtDoesNotAllowBookmarkEnd = false; + if (bParagraphHasGrabBagSdt) + { + // rich blockSdt are allowed to contain bookmarkEnd: richText(0), Group ... + // plain blockSdt are not allowed to contain bookmarkEnd. + switch (*m_aParagraphSdt.m_oSdtPrToken) + { + case FSNS(XML_w14, XML_checkbox): + case FSNS(XML_w, XML_text): + case FSNS(XML_w, XML_dropDownList): + case FSNS(XML_w, XML_comboBox): + // case FSNS(XML_w, XML_date): + case FSNS(XML_w, XML_picture): + bSdtDoesNotAllowBookmarkEnd = true; + break; + default: + break; + } + } + return bSdtDoesNotAllowBookmarkEnd; +} + +void DocxAttributeOutput::WriteBookmarkEndWithParaSdt(const OUString& rString) +{ + m_aParagraphSdt.m_vBookmarkEnd.push_back(rString); +} + void DocxAttributeOutput::WritePostponedGraphic() { for (const auto & rPostponedDiagram : *m_oPostponedGraphic) @@ -7194,6 +7294,8 @@ void DocxAttributeOutput::EndParaSdtBlock() { // Paragraph-level SDT still open? Close it now. m_aParagraphSdt.EndSdtBlock(m_pSerializer); + + DoWriteBookmarksEnd(m_aParagraphSdt.m_vBookmarkEnd); } } diff --git a/sw/source/filter/ww8/docxattributeoutput.hxx b/sw/source/filter/ww8/docxattributeoutput.hxx index fb7e7feda1be..be09237a9c60 100644 --- a/sw/source/filter/ww8/docxattributeoutput.hxx +++ b/sw/source/filter/ww8/docxattributeoutput.hxx @@ -176,6 +176,7 @@ public: // If the SDT has not beeen started (!m_bStartedSdt) and the text positions do not match, // then this SdtBlockHelper cache may be cleared and re-populated. const SwPosition* m_pStartPosition; // only used by m_aParagraphSdt + std::vector<OUString> m_vBookmarkEnd; // only used by m_aParagraphSdt // m_oSdtPrToken is a key GrabBag value, with a two-fold purpose: // - the absence of m_oSdtPrToken also means that (XML_w, XML_sdt) should not be written @@ -1159,6 +1160,14 @@ public: void SetAlternateContentChoiceOpen( bool bAltContentChoiceOpen ) { m_bAlternateContentChoiceOpen = bAltContentChoiceOpen; } bool IsAlternateContentChoiceOpen( ) const { return m_bAlternateContentChoiceOpen; } void GetSdtEndBefore(const SdrObject* pSdrObj); + + // returns m_aParagraphSdt's effective m_oSdtPrToken for the current node + std::optional<sal_Int32> GetGrabBagParaSdtPrToken(); + + // PlainText-y blockSdt content controls are considered corrupt if they contain a bookmarkEnd + bool DoesParaSdtPreventBookmarkEnd(const sal_Int32 nPos); + void WriteBookmarkEndWithParaSdt(const OUString& rString); + bool IsFirstParagraph() const { return m_bIsFirstParagraph; } /// Stores the table export state to the passed context and resets own state. diff --git a/sw/source/filter/ww8/docxexport.cxx b/sw/source/filter/ww8/docxexport.cxx index 0f07bc346518..9f88fc5309bc 100644 --- a/sw/source/filter/ww8/docxexport.cxx +++ b/sw/source/filter/ww8/docxexport.cxx @@ -172,6 +172,7 @@ void DocxExport::AppendBookmarks( const SwTextNode& rNode, sal_Int32 nCurrentPos IMarkVector aMarks; if ( GetBookmarks( rNode, nCurrentPos, nCurrentPos + nLen, aMarks ) ) { + const bool bSdtGetsBookmarkEnd = m_pAttrOutput->DoesParaSdtPreventBookmarkEnd(nCurrentPos); for ( MarkBase* pMark : aMarks ) { const sal_Int32 nStart = pMark->GetMarkStart().GetContentIndex(); @@ -181,7 +182,12 @@ void DocxExport::AppendBookmarks( const SwTextNode& rNode, sal_Int32 nCurrentPos aStarts.push_back( pMark->GetName().toString() ); if (nEnd == nCurrentPos && rNode == pMark->GetMarkEnd().GetNode()) - aEnds.push_back( pMark->GetName().toString() ); + { + if (bSdtGetsBookmarkEnd) + m_pAttrOutput->WriteBookmarkEndWithParaSdt(pMark->GetName().toString()); + else + aEnds.push_back(pMark->GetName().toString()); + } } }
