include/tools/stream.hxx          |    2 -
 svtools/source/svrtf/svparser.cxx |   50 +++-----------------------------------
 tools/source/stream/stream.cxx    |    8 +++---
 3 files changed, 10 insertions(+), 50 deletions(-)

New commits:
commit 6c89e5a2d2348c7734300b842065360fa433ab4e
Author:     Mike Kaganski <[email protected]>
AuthorDate: Wed Feb 25 16:37:42 2026 +0500
Commit:     Mike Kaganski <[email protected]>
CommitDate: Thu Feb 26 16:58:03 2026 +0100

    Use SvStream::DetectEncoding in GetNextChar
    
    ... instead of direct use of ucsdet_detect.
    
    Change-Id: If1c523b3b6fd19c2817dfe2cd09e6aa7148de634
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/200377
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/include/tools/stream.hxx b/include/tools/stream.hxx
index a1d88674bff6..878c555ff78a 100644
--- a/include/tools/stream.hxx
+++ b/include/tools/stream.hxx
@@ -332,7 +332,7 @@ public:
         After the function, the position is after BOM (if any); 
GetStreamEncoding
         returns the detected encoding; GetEndian returns the detected 
endianness
         (for UTF-16). */
-    void DetectEncoding();
+    void DetectEncoding(size_t maxBytes = 4096);
 
     /** Read a line of Unicode.
 
diff --git a/svtools/source/svrtf/svparser.cxx 
b/svtools/source/svrtf/svparser.cxx
index 419de30e9d0a..3f6a0f0531d3 100644
--- a/svtools/source/svrtf/svparser.cxx
+++ b/svtools/source/svrtf/svparser.cxx
@@ -192,51 +192,11 @@ sal_uInt32 SvParser<T>::GetNextChar()
     // maintained by SaveState/RestoreState.
     if( bSwitchToUCS2 && 0 == rInput.Tell() )
     {
-        rInput.StartReadingUnicodeText(RTL_TEXTENCODING_DONTKNOW);
-        if (rInput.good())
-        {
-            sal_uInt64 nPos = rInput.Tell();
-            if (nPos == 2)
-                eSrcEnc = RTL_TEXTENCODING_UCS2;
-            else if (nPos == 3)
-                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
-            else // Try to detect encoding without BOM
-            {
-                std::vector<char> buf(65535); // Arbitrarily chosen 64KiB 
buffer
-                const size_t nSize = rInput.ReadBytes(buf.data(), buf.size());
-                rInput.Seek(0);
-                if (nSize > 0)
-                {
-                    UErrorCode uerr = U_ZERO_ERROR;
-                    UCharsetDetector* ucd = ucsdet_open(&uerr);
-                    ucsdet_setText(ucd, buf.data(), nSize, &uerr);
-                    if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
-                    {
-                        const char* pEncodingName = ucsdet_getName(match, 
&uerr);
-
-                        if (U_SUCCESS(uerr))
-                        {
-                            if (strcmp("UTF-8", pEncodingName) == 0)
-                            {
-                                SetSrcEncoding(RTL_TEXTENCODING_UTF8);
-                            }
-                            else if (strcmp("UTF-16LE", pEncodingName) == 0)
-                            {
-                                eSrcEnc = RTL_TEXTENCODING_UCS2;
-                                rInput.SetEndian(SvStreamEndian::LITTLE);
-                            }
-                            else if (strcmp("UTF-16BE", pEncodingName) == 0)
-                            {
-                                eSrcEnc = RTL_TEXTENCODING_UCS2;
-                                rInput.SetEndian(SvStreamEndian::BIG);
-                            }
-                        }
-                    }
-
-                    ucsdet_close(ucd);
-                }
-            }
-        }
+        rInput.DetectEncoding(65535); // Arbitrarily chosen 64KiB buffer
+        if (rInput.GetStreamEncoding() == RTL_TEXTENCODING_UCS2)
+            eSrcEnc = RTL_TEXTENCODING_UCS2;
+        else if (rInput.GetStreamEncoding() == RTL_TEXTENCODING_UTF8)
+            SetSrcEncoding(RTL_TEXTENCODING_UTF8);
         bSwitchToUCS2 = false;
     }
 
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index 703d0e63b185..96f84c36517b 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -748,7 +748,7 @@ void SvStream::StartReadingUnicodeText(rtl_TextEncoding 
eReadBomEncoding)
         Seek(nOldPos);      // no BOM, pure data
 }
 
-void SvStream::DetectEncoding()
+void SvStream::DetectEncoding(size_t maxBytes)
 {
     static constexpr auto mapEncodings
         = frozen::make_unordered_map<std::string_view, rtl_TextEncoding>({
@@ -801,8 +801,8 @@ void SvStream::DetectEncoding()
     }
 
     assert(nBomSize == 0); // we are at nOrigPos
-    char bytes[4096] = { 0 };
-    size_t nRead = ReadBytes(bytes, sizeof(bytes));
+    auto bytes = std::make_unique<char[]>(maxBytes);
+    size_t nRead = ReadBytes(bytes.get(), maxBytes);
     Seek(nOrigPos);
     ResetError();
 
@@ -815,7 +815,7 @@ void SvStream::DetectEncoding()
         return;
     comphelper::ScopeGuard ucsdet_close_guard([ucd] { ucsdet_close(ucd); });
 
-    ucsdet_setText(ucd, bytes, nRead, &uerr);
+    ucsdet_setText(ucd, bytes.get(), nRead, &uerr);
     if (!U_SUCCESS(uerr))
         return;
 

Reply via email to