sw/source/filter/md/swmd.cxx | 119 ++++++++++++------------------------------- sw/source/filter/md/swmd.hxx | 2 2 files changed, 35 insertions(+), 86 deletions(-)
New commits: commit a488a7ca877541e3f999df9a5d00db077e26ad36 Author: Mike Kaganski <[email protected]> AuthorDate: Sun Feb 22 17:03:39 2026 +0100 Commit: Mike Kaganski <[email protected]> CommitDate: Mon Feb 23 19:10:01 2026 +0100 Optimize SwMarkdownParser::CallParser a bit Change-Id: Ic4e04b5a2ef8b0d3e1573817616c2e94d8339854 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200069 Tested-by: Jenkins Reviewed-by: Mike Kaganski <[email protected]> diff --git a/sw/source/filter/md/swmd.cxx b/sw/source/filter/md/swmd.cxx index 1b410ab17448..8581c9aafe17 100644 --- a/sw/source/filter/md/swmd.cxx +++ b/sw/source/filter/md/swmd.cxx @@ -748,7 +748,6 @@ SwMarkdownParser::SwMarkdownParser(SwDoc& rD, SwPaM& rCursor, SvStream& rIn, OUS rCursor.DeleteMark(); m_pPam = &rCursor; m_rInput.ResetError(); - m_nFilesize = m_rInput.TellEnd(); m_rInput.Seek(STREAM_SEEK_TO_BEGIN); m_rInput.ResetError(); } @@ -843,102 +842,55 @@ ErrCodeMsg MarkdownReader::Read(SwDoc& rDoc, const OUString& rBaseURL, SwPaM& rP ErrCode SwMarkdownParser::CallParser() { // use utf8 - rtl_TextEncoding eSrcEnc = RTL_TEXTENCODING_DONTKNOW; - m_rInput.StartReadingUnicodeText(eSrcEnc); - if (m_rInput.good()) + m_rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); + if (!m_rInput.good()) { - sal_uInt64 nPos = m_rInput.Tell(); //bom size - { - std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer - const size_t nSize = m_rInput.ReadBytes(buf.data(), buf.size()); - if (nSize > 0) - { - UErrorCode uerr = U_ZERO_ERROR; - UCharsetDetector* ucd = ucsdet_open(&uerr); - ucsdet_setText(ucd, buf.data(), nSize, &uerr); - if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr)) - { - const char* pEncodingName = ucsdet_getName(match, &uerr); - - if (strcmp("UTF-16LE", pEncodingName) == 0) - { - eSrcEnc = RTL_TEXTENCODING_UCS2; - m_rInput.SetEndian(SvStreamEndian::LITTLE); - } - else if (strcmp("UTF-16BE", pEncodingName) == 0) - { - eSrcEnc = RTL_TEXTENCODING_UCS2; - m_rInput.SetEndian(SvStreamEndian::BIG); - } - else - { - eSrcEnc = rtl_getTextEncodingFromMimeCharset(pEncodingName); - } - } - ucsdet_close(ucd); - } - else - { - return ERRCODE_IO_INVALIDLENGTH; - } - } + return ERRCODE_IO_INVALIDCHAR; + } + rtl_TextEncoding eSrcEnc; + const sal_uInt64 nPos = m_rInput.Tell(); //bom size + if (nPos == 2) + eSrcEnc = RTL_TEXTENCODING_UCS2; + else if (nPos == 3) + eSrcEnc = RTL_TEXTENCODING_UTF8; + else + { + SvStreamEndian eEndian; + SfxObjectShell::DetectCharSet(m_rInput, eSrcEnc, eEndian); if (eSrcEnc == RTL_TEXTENCODING_DONTKNOW) return ERRCODE_IO_INVALIDCHAR; + m_rInput.SetEndian(eEndian); + } - m_rInput.Seek(nPos); - m_rInput.ResetError(); - m_nFilesize -= nPos; - - OUString sData; - OString sUtf8Data; - - if (eSrcEnc == RTL_TEXTENCODING_UCS2) - { - if (m_nFilesize & 1) - return ERRCODE_IO_INVALIDCHAR; - - tools::Long nChars = m_nFilesize / 2; - std::vector<sal_Unicode> aCharData(nChars); + m_rInput.ResetError(); + const sal_uInt64 nFilesize = m_rInput.remainingSize(); + OString sUtf8Data; - for (tools::Long n = 0; n < nChars; n++) - { - m_rInput.ReadUtf16(aCharData[n]); - } + if (eSrcEnc == RTL_TEXTENCODING_UCS2) + { + if (nFilesize & 1) + return ERRCODE_IO_INVALIDCHAR; - sData = OUString(aCharData.data(), nChars); - sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8); - } - else + const sal_uInt64 nChars = nFilesize / 2; + OUString sData = read_uInt16s_ToOUString(m_rInput, nChars); + sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8); + } + else + { + sUtf8Data = read_uInt8s_ToOString(m_rInput, nFilesize); + if (eSrcEnc != RTL_TEXTENCODING_UTF8) { - tools::Long nChars = m_nFilesize; - std::vector<char> aCharData(nChars); - m_rInput.ReadBytes(aCharData.data(), nChars); - sData = OUString(aCharData.data(), nChars, eSrcEnc); + OUString sData = OStringToOUString(sUtf8Data, eSrcEnc); sUtf8Data = OUStringToOString(sData, RTL_TEXTENCODING_UTF8); } - - if (sUtf8Data.getLength()) - { - m_nFilesize = sUtf8Data.getLength(); - m_pArr.reset(new char[m_nFilesize + 1]); - memcpy(m_pArr.get(), sUtf8Data.getStr(), m_nFilesize); - //HACK: At least the implementation of md4c 0.5.2 apparently expects the passed-in - // memory to be null-terminated (it calls e.g. strcspn on it), so pass in an additional - // byte: - m_pArr[m_nFilesize] = 0; - } - else - { - return ERRCODE_IO_INVALIDCHAR; - } } - else + if (sUtf8Data.isEmpty()) { return ERRCODE_IO_INVALIDCHAR; } - ::StartProgress(STR_STATSTR_W4WREAD, 0, m_nFilesize, m_xDoc->GetDocShell()); + ::StartProgress(STR_STATSTR_W4WREAD, 0, sUtf8Data.getLength(), m_xDoc->GetDocShell()); SwTextFormatColl* pColl = m_xDoc->getIDocumentStylePoolAccess().GetTextCollFromPool(SwPoolFormatId::COLL_TEXT); @@ -956,7 +908,7 @@ ErrCode SwMarkdownParser::CallParser() nullptr, nullptr }; - int result = md_parse(m_pArr.get(), m_nFilesize, &parser, static_cast<void*>(this)); + int result = md_parse(sUtf8Data.getStr(), sUtf8Data.getLength(), &parser, this); if (result != 0) { @@ -969,7 +921,6 @@ ErrCode SwMarkdownParser::CallParser() SwMarkdownParser::~SwMarkdownParser() { - m_pArr.reset(); m_pNumRuleInfo.reset(); m_xDoc.clear(); } diff --git a/sw/source/filter/md/swmd.hxx b/sw/source/filter/md/swmd.hxx index 3aebbabf0896..411b668601f6 100644 --- a/sw/source/filter/md/swmd.hxx +++ b/sw/source/filter/md/swmd.hxx @@ -80,9 +80,7 @@ class SwMarkdownParser SwPaM* m_pPam; SvStream& m_rInput; // SfxMedium* m_pMedium; - std::unique_ptr<char[]> m_pArr; std::unique_ptr<SwMdNumRuleInfo> m_pNumRuleInfo; - tools::Long m_nFilesize; MDAttrStack m_aAttrStack;
