include/tools/stream.hxx | 2 - svtools/source/svrtf/svparser.cxx | 50 +++----------------------------------- tools/source/stream/stream.cxx | 8 +++--- 3 files changed, 10 insertions(+), 50 deletions(-)
New commits: commit 6c89e5a2d2348c7734300b842065360fa433ab4e Author: Mike Kaganski <[email protected]> AuthorDate: Wed Feb 25 16:37:42 2026 +0500 Commit: Mike Kaganski <[email protected]> CommitDate: Thu Feb 26 16:58:03 2026 +0100 Use SvStream::DetectEncoding in GetNextChar ... instead of direct use of ucsdet_detect. Change-Id: If1c523b3b6fd19c2817dfe2cd09e6aa7148de634 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200377 Tested-by: Jenkins Reviewed-by: Mike Kaganski <[email protected]> diff --git a/include/tools/stream.hxx b/include/tools/stream.hxx index a1d88674bff6..878c555ff78a 100644 --- a/include/tools/stream.hxx +++ b/include/tools/stream.hxx @@ -332,7 +332,7 @@ public: After the function, the position is after BOM (if any); GetStreamEncoding returns the detected encoding; GetEndian returns the detected endianness (for UTF-16). */ - void DetectEncoding(); + void DetectEncoding(size_t maxBytes = 4096); /** Read a line of Unicode. diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx index 419de30e9d0a..3f6a0f0531d3 100644 --- a/svtools/source/svrtf/svparser.cxx +++ b/svtools/source/svrtf/svparser.cxx @@ -192,51 +192,11 @@ sal_uInt32 SvParser<T>::GetNextChar() // maintained by SaveState/RestoreState. if( bSwitchToUCS2 && 0 == rInput.Tell() ) { - rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW); - if (rInput.good()) - { - sal_uInt64 nPos = rInput.Tell(); - if (nPos == 2) - eSrcEnc = RTL_TEXTENCODING_UCS2; - else if (nPos == 3) - SetSrcEncoding(RTL_TEXTENCODING_UTF8); - else // Try to detect encoding without BOM - { - std::vector<char> buf(65535); // Arbitrarily chosen 64KiB buffer - const size_t nSize = rInput.ReadBytes(buf.data(), buf.size()); - rInput.Seek(0); - if (nSize > 0) - { - UErrorCode uerr = U_ZERO_ERROR; - UCharsetDetector* ucd = ucsdet_open(&uerr); - ucsdet_setText(ucd, buf.data(), nSize, &uerr); - if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr)) - { - const char* pEncodingName = ucsdet_getName(match, &uerr); - - if (U_SUCCESS(uerr)) - { - if (strcmp("UTF-8", pEncodingName) == 0) - { - SetSrcEncoding(RTL_TEXTENCODING_UTF8); - } - else if (strcmp("UTF-16LE", pEncodingName) == 0) - { - eSrcEnc = RTL_TEXTENCODING_UCS2; - rInput.SetEndian(SvStreamEndian::LITTLE); - } - else if (strcmp("UTF-16BE", pEncodingName) == 0) - { - eSrcEnc = RTL_TEXTENCODING_UCS2; - rInput.SetEndian(SvStreamEndian::BIG); - } - } - } - - ucsdet_close(ucd); - } - } - } + rInput.DetectEncoding(65535); // Arbitrarily chosen 64KiB buffer + if (rInput.GetStreamEncoding() == RTL_TEXTENCODING_UCS2) + eSrcEnc = RTL_TEXTENCODING_UCS2; + else if (rInput.GetStreamEncoding() == RTL_TEXTENCODING_UTF8) + SetSrcEncoding(RTL_TEXTENCODING_UTF8); bSwitchToUCS2 = false; } diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx index 703d0e63b185..96f84c36517b 100644 --- a/tools/source/stream/stream.cxx +++ b/tools/source/stream/stream.cxx @@ -748,7 +748,7 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding eReadBomEncoding) Seek(nOldPos); // no BOM, pure data } -void SvStream::DetectEncoding() +void SvStream::DetectEncoding(size_t maxBytes) { static constexpr auto mapEncodings = frozen::make_unordered_map<std::string_view, rtl_TextEncoding>({ @@ -801,8 +801,8 @@ void SvStream::DetectEncoding() } assert(nBomSize == 0); // we are at nOrigPos - char bytes[4096] = { 0 }; - size_t nRead = ReadBytes(bytes, sizeof(bytes)); + auto bytes = std::make_unique<char[]>(maxBytes); + size_t nRead = ReadBytes(bytes.get(), maxBytes); Seek(nOrigPos); ResetError(); @@ -815,7 +815,7 @@ void SvStream::DetectEncoding() return; comphelper::ScopeGuard ucsdet_close_guard([ucd] { ucsdet_close(ucd); }); - ucsdet_setText(ucd, bytes, nRead, &uerr); + ucsdet_setText(ucd, bytes.get(), nRead, &uerr); if (!U_SUCCESS(uerr)) return;
