sw/qa/extras/htmlexport/htmlexport.cxx | 38 +++++++++ sw/source/filter/html/htmlatr.cxx | 30 +++++++ sw/source/filter/html/wrthtml.cxx | 128 ++++++++++++++++++++++++++------- sw/source/filter/html/wrthtml.hxx | 5 + 4 files changed, 175 insertions(+), 26 deletions(-)
New commits: commit 5fa9481a203edf21bd7fe2373fdc51312bb357a4 Author: Miklos Vajna <[email protected]> AuthorDate: Tue Feb 15 17:07:45 2022 +0100 Commit: Miklos Vajna <[email protected]> CommitDate: Wed Feb 16 09:02:04 2022 +0100 sw HTML export: add a new LeadingTabWidth option This is a simple way to not loose indentation done with tabs (e.g. source code) during the HTML export. A more complex way would be ask the layout for the tab portion width, ask VCL what's the size of an nbsp glyph and then act accordingly, which is is not done here. (cherry picked from commit 505f5db522f8406715f455d8007d014073a99097) Change-Id: I2a5c0512e9e5541e55e10f29952679bf05d63f1b diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index eff7bda22037..5064abc3f7a5 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -2059,6 +2059,44 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testTrailingLineBreak) CPPUNIT_ASSERT_EQUAL(OUString("test\n"), aActual); } +CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testLeadingTab) +{ + // Given a document with leading tabs: + loadURL("private:factory/swriter", nullptr); + SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get()); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + SwWrtShell* pWrtShell = pDoc->GetDocShell()->GetWrtShell(); + pWrtShell->Insert("\t first"); + pWrtShell->SplitNode(); + pWrtShell->Insert("\t\t second"); + pWrtShell->SplitNode(); + pWrtShell->Insert("thi \t rd"); + + // When exporting to HTML, using LeadingTabWidth=2: + uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY); + uno::Sequence<beans::PropertyValue> aStoreProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")), + comphelper::makePropertyValue("LeadingTabWidth", static_cast<sal_Int32>(2)), + }; + xStorable->storeToURL(maTempFile.GetURL(), aStoreProperties); + + // Then make sure that leading tabs are replaced with 2 nbsps: + SvMemoryStream aStream; + HtmlExportTest::wrapFragment(maTempFile, aStream); + xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream); + CPPUNIT_ASSERT(pDoc); + // Without the accompanying fix in place, this test would have failed with: + // - Expected: <nbsp><nbsp><space>first + // - Actual : <tab><space>first + // i.e. the leading tab was not replaced by 2 nbsps. + assertXPathContent(pXmlDoc, "//reqif-xhtml:p[1]", u"\xa0\xa0 first"); + // Test a leading tab that is not at the start of the paragraph: + assertXPathContent(pXmlDoc, "//reqif-xhtml:p[2]", u"\xa0\xa0\xa0\xa0 second"); + // Test a tab which is not leading: + assertXPathContent(pXmlDoc, "//reqif-xhtml:p[3]", u"thi \t rd"); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx index 80f1b7d2ddea..8f8d0c1fa080 100644 --- a/sw/source/filter/html/htmlatr.cxx +++ b/sw/source/filter/html/htmlatr.cxx @@ -2361,6 +2361,8 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) { HTMLOutContext aContext( rHTMLWrt.m_eDestEnc ); + // Tabs are leading till there is a non-tab since the start of the paragraph. + bool bLeadingTab = true; for( ; nStrPos < nEnd; nStrPos++ ) { // output the frames that are anchored to the current position @@ -2494,7 +2496,33 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) rHTMLWrt.OutPointFieldmarks(aMarkPos); } else - HTMLOutFuncs::Out_Char( rWrt.Strm(), c, aContext, &rHTMLWrt.m_aNonConvertableCharacters ); + { + bool bConsumed = false; + if (c == '\t') + { + if (bLeadingTab && rHTMLWrt.m_nLeadingTabWidth.has_value()) + { + // Consume a tab if it's leading and we know the number of NBSPs to + // be used as a replacement. + for (sal_Int32 i = 0; i < *rHTMLWrt.m_nLeadingTabWidth; ++i) + { + rWrt.Strm().WriteCharPtr(" "); + } + bConsumed = true; + } + } + else + { + // Not a tab -> later tabs are no longer leading. + bLeadingTab = false; + } + + if (!bConsumed) + { + HTMLOutFuncs::Out_Char(rWrt.Strm(), c, aContext, + &rHTMLWrt.m_aNonConvertableCharacters); + } + } if (!rHTMLWrt.mbReqIF) { diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx index 3506ffa4707c..b665bd7bb503 100644 --- a/sw/source/filter/html/wrthtml.cxx +++ b/sw/source/filter/html/wrthtml.cxx @@ -320,6 +320,14 @@ void SwHTMLWriter::SetupFilterFromPropertyValues( // XHTML namespace implies XHTML. mbXHTML = true; } + + it = aStoreMap.find("LeadingTabWidth"); + if (it != aStoreMap.end()) + { + sal_Int32 nVal{}; + it->second >>= nVal; + m_nLeadingTabWidth.emplace(nVal); + } } ErrCode SwHTMLWriter::WriteStream() diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index 217d571719f9..cc23a82b22d3 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -415,6 +415,9 @@ public: /// DPI used when exporting a vector shape as a bitmap. std::optional<sal_Int32> m_nShapeDPI; + /// If set, replace leading tabs with this many non-breaking spaces. + std::optional<sal_Int32> m_nLeadingTabWidth; + /// Construct an instance of SwHTMLWriter and optionally give it /// the filter options directly, which can also be set via SetupFilterOptions(). explicit SwHTMLWriter( const OUString& rBaseURL, const OUString& rFilterOptions = "" ); commit 97fee18693461e5b0272f2826860269483ca547e Author: Miklos Vajna <[email protected]> AuthorDate: Tue Feb 15 10:00:33 2022 +0100 Commit: Miklos Vajna <[email protected]> CommitDate: Wed Feb 16 08:43:34 2022 +0100 sw HTML export: extract a SetupFilterFromPropertyValues() From the two SetupFilterOptions() overloads, which meant that some options were only possible to set from FilterOptions, and others were only possible to set via UNO property values. (cherry picked from commit 22d09d65c0e61cac1fa27af6a04a23e16f97c907) Conflicts: sw/source/filter/html/wrthtml.cxx Change-Id: Ib7cdbb082e93b9ff105afe72f295994733b4525a diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx index 080598036c9c..3506ffa4707c 100644 --- a/sw/source/filter/html/wrthtml.cxx +++ b/sw/source/filter/html/wrthtml.cxx @@ -194,7 +194,53 @@ void SwHTMLWriter::SetupFilterOptions(SfxMedium& rMedium) const OUString sFilterOptions = static_cast<const SfxStringItem*>(pItem)->GetValue(); SetupFilterOptions(sFilterOptions); - comphelper::SequenceAsHashMap aStoreMap(rMedium.GetArgs()); + SetupFilterFromPropertyValues(rMedium.GetArgs()); +} + +void SwHTMLWriter::SetupFilterOptions(const OUString& rFilterOptions) +{ + comphelper::SequenceAsHashMap aStoreMap; + if (rFilterOptions.indexOf("SkipImages") >= 0) + { + aStoreMap["SkipImages"] <<= true; + } + else if (rFilterOptions.indexOf("SkipHeaderFooter") >= 0) + { + aStoreMap["SkipHeaderFooter"] <<= true; + } + else if (rFilterOptions.indexOf("EmbedImages") >= 0) + { + aStoreMap["EmbedImages"] <<= true; + } + + // this option can be "on" together with any of above + if (rFilterOptions.indexOf("NoLineLimit") >= 0) + { + aStoreMap["NoLineLimit"] <<= true; + } + + const uno::Sequence<OUString> aOptionSeq + = comphelper::string::convertCommaSeparated(rFilterOptions); + const OUString aXhtmlNsKey("xhtmlns="); + for (const auto& rOption : aOptionSeq) + { + if (rOption == "XHTML") + { + aStoreMap["XHTML"] <<= true; + } + else if (rOption.startsWith(aXhtmlNsKey)) + { + aStoreMap["XhtmlNs"] <<= rOption.copy(aXhtmlNsKey.getLength()); + } + } + + SetupFilterFromPropertyValues(aStoreMap.getAsConstPropertyValueList()); +} + +void SwHTMLWriter::SetupFilterFromPropertyValues( + const css::uno::Sequence<css::beans::PropertyValue>& rPropertyValues) +{ + comphelper::SequenceAsHashMap aStoreMap(rPropertyValues); auto it = aStoreMap.find("RTFOLEMimeType"); if (it != aStoreMap.end()) { @@ -214,41 +260,65 @@ void SwHTMLWriter::SetupFilterOptions(SfxMedium& rMedium) it->second >>= nVal; m_nShapeDPI.emplace(nVal); } -} -void SwHTMLWriter::SetupFilterOptions(const OUString& rFilterOptions) -{ - if (rFilterOptions == "SkipImages") + it = aStoreMap.find("SkipImages"); + if (it != aStoreMap.end()) { - mbSkipImages = true; + bool bVal{}; + it->second >>= bVal; + mbSkipImages = bVal; } - else if (rFilterOptions == "SkipHeaderFooter") + + it = aStoreMap.find("SkipHeaderFooter"); + if (it != aStoreMap.end()) { - mbSkipHeaderFooter = true; + bool bVal{}; + it->second >>= bVal; + mbSkipHeaderFooter = bVal; } - else if (rFilterOptions == "EmbedImages") + + it = aStoreMap.find("EmbedImages"); + if (it != aStoreMap.end()) { - mbEmbedImages = true; + bool bVal{}; + it->second >>= bVal; + mbEmbedImages = bVal; } - const uno::Sequence<OUString> aOptionSeq = comphelper::string::convertCommaSeparated(rFilterOptions); - const OUString aXhtmlNsKey("xhtmlns="); - for (const auto& rOption : aOptionSeq) + it = aStoreMap.find("NoLineLimit"); + if (it != aStoreMap.end()) { - if (rOption == "XHTML") - mbXHTML = true; - else if (rOption.startsWith(aXhtmlNsKey)) + bool bVal{}; + it->second >>= bVal; + if (bVal) { - maNamespace = rOption.copy(aXhtmlNsKey.getLength()).toUtf8(); - if (maNamespace == "reqif-xhtml") - { - mbReqIF = true; - // XHTML is always just a fragment inside ReqIF. - mbSkipHeaderFooter = true; - } - // XHTML namespace implies XHTML. - mbXHTML = true; + m_nWhishLineLen = -1; + } + } + + it = aStoreMap.find("XHTML"); + if (it != aStoreMap.end()) + { + bool bVal{}; + it->second >>= bVal; + mbXHTML = bVal; + } + + it = aStoreMap.find("XhtmlNs"); + if (it != aStoreMap.end()) + { + OUString aVal; + it->second >>= aVal; + + maNamespace = aVal.toUtf8(); + if (maNamespace == "reqif-xhtml") + { + mbReqIF = true; + // XHTML is always just a fragment inside ReqIF. + mbSkipHeaderFooter = true; } + // XHTML namespace implies XHTML. + mbXHTML = true; } } diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index f53b7e659e9d..217d571719f9 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -274,6 +274,8 @@ class SW_DLLPUBLIC SwHTMLWriter : public Writer protected: ErrCode WriteStream() override; void SetupFilterOptions(SfxMedium& rMedium) override; + void SetupFilterFromPropertyValues( + const css::uno::Sequence<css::beans::PropertyValue>& rPropertyValues); public: std::vector<OUString> m_aImgMapNames; // written image maps
