This is an automated email from the ASF dual-hosted git repository.

leginee pushed a commit to branch Fix-sal-encoding-dispatch
in repository https://gitbox.apache.org/repos/asf/openoffice.git

commit 8121fd3c154d3417cac43a2a2219db58b2a2b0f0
Author: Peter Kovacs <[email protected]>
AuthorDate: Sat Jun 20 06:19:35 2026 +0200

    Fix sal encoding-dispatch off-by-one (ISCII removal) + ZapfDingbats table 
bugs
    
    Impl_getTextEncodingData() is a positional array indexed by the ABI-frozen
    rtl_TextEncoding enum. #i119141 removed the ISCII converter but deleted its
    array slot with no NULL placeholder, shifting JAVA_UTF8, ADOBE_STANDARD,
    ADOBE_SYMBOL, PT154 and ADOBE_DINGBATS onto the wrong converter. Restore the
    NULL slot and add a compile-time size guard.
    
    Wiring ADOBE_DINGBATS correctly surfaced three latent ZapfDingbats table
    errors (0x6C→U+25CF; reverse range to U+2797; circled-digits reverse start
    0xAC); fix all three. Update rtl_textcvt qa: park the now-unimplemented 
ISCII
    behind TEST_ISCII_DEVANAGARI, correct the ISCII Windows codepage to 57002.
---
 main/sal/qa/rtl/textenc/rtl_textcvt.cxx | 20 +++++++++++++++++---
 main/sal/textenc/convertadobe.tab       |  6 +++---
 main/sal/textenc/textenc.cxx            | 14 ++++++++++++++
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/main/sal/qa/rtl/textenc/rtl_textcvt.cxx 
b/main/sal/qa/rtl/textenc/rtl_textcvt.cxx
index dd5f68e999..ec1148f08a 100644
--- a/main/sal/qa/rtl/textenc/rtl_textcvt.cxx
+++ b/main/sal/qa/rtl/textenc/rtl_textcvt.cxx
@@ -36,6 +36,13 @@
 #include "sal/types.h"
 #include "gtest/gtest.h"
 
+/* The ISCII Devanagari converter was removed for licensing reasons (its table
+   was derived from LGPL code; see #i119141), so 
RTL_TEXTENCODING_ISCII_DEVANAGARI
+   is currently unimplemented and has no converter.  The test data that 
exercises
+   it is kept below but compiled out.  Define this macro to re-enable those
+   checks once an Apache-licensed ISCII converter is reintroduced. */
+// #define TEST_ISCII_DEVANAGARI
+
 namespace {
 
 struct SingleByteCharSet {
@@ -1062,6 +1069,7 @@ TEST_F(Test, testSingleByte) {
                 0x0425,0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,
                 0x041F,0x042F,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
                 0x042C,0x042B,0x0417,0x0428,0x042D,0x0429,0x0427,0x042A } },
+#ifdef TEST_ISCII_DEVANAGARI
             { RTL_TEXTENCODING_ISCII_DEVANAGARI,
               { 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
                 0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
@@ -1095,6 +1103,7 @@ TEST_F(Test, testSingleByte) {
                 0x094D,0x093C,0x0964,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
                 0xFFFF,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
                 0x096D,0x096E,0x096F,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF } },
+#endif
             { RTL_TEXTENCODING_ADOBE_STANDARD,
               { 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
                 0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0xFFFF,
@@ -1178,7 +1187,7 @@ TEST_F(Test, testSingleByte) {
                 0x2738,0x2739,0x273A,0x273B,0x273C,0x273D,0x273E,0x273F,
 // 60
                 0x2740,0x2741,0x2742,0x2743,0x2744,0x2745,0x2746,0x2747,
-                0x2748,0x2749,0x274A,0x274B,0x27CF,0x274D,0x25A0,0x274F,
+                0x2748,0x2749,0x274A,0x274B,0x25CF,0x274D,0x25A0,0x274F,
                 0x2750,0x2751,0x2752,0x25B2,0x25BC,0x25C6,0x2756,0x25D7,
                 0x2758,0x2759,0x275A,0x275B,0x275C,0x275D,0x275E,0xFFFF,
 // 80
@@ -1194,7 +1203,7 @@ TEST_F(Test, testSingleByte) {
 // C0
                 0x2780,0x2781,0x2782,0x2783,0x2784,0x2785,0x2786,0x2787,
                 0x2788,0x2789,0x278A,0x278B,0x278C,0x278D,0x278E,0x278F,
-                0x2790,0x2791,0x2792,0x2793,0x2794,0x2795,0x2796,0x2797,
+                0x2790,0x2791,0x2792,0x2793,0x2794,0x2192,0x2194,0x2195,
                 0x2798,0x2799,0x279A,0x279B,0x279C,0x279D,0x279E,0x279F,
 // E0
                 0x27A0,0x27A1,0x27A2,0x27A3,0x27A4,0x27A5,0x27A6,0x27A7,
@@ -2775,7 +2784,10 @@ TEST_F(Test, testWindows) {
         { 0, RTL_TEXTENCODING_DONTKNOW, true },
         { 0, RTL_TEXTENCODING_UCS4, true },
         { 0, RTL_TEXTENCODING_UCS2, true },
-        { 0, RTL_TEXTENCODING_ISCII_DEVANAGARI, true }
+        // 57002 is the genuine Windows code page for ISCII Devanagari; the
+        // codepage<->encoding mapping is a fixed table independent of whether
+        // a converter is implemented (the converter was removed, see 
#i119141).
+        { 57002, RTL_TEXTENCODING_ISCII_DEVANAGARI, true }
     };
     for (std::size_t i = 0; i < sizeof data / sizeof data[0]; ++i) {
         OSL_ASSERT(data[i].codePage != 0 || data[i].reverse);
@@ -2832,8 +2844,10 @@ TEST_F(Test, testInfo) {
         { RTL_TEXTENCODING_IBM_861, RTL_TEXTENCODING_INFO_MIME, true },
         { RTL_TEXTENCODING_IBM_863, RTL_TEXTENCODING_INFO_MIME, true },
         { RTL_TEXTENCODING_IBM_865, RTL_TEXTENCODING_INFO_MIME, true },
+#ifdef TEST_ISCII_DEVANAGARI
         { RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_ASCII, true 
},
         { RTL_TEXTENCODING_ISCII_DEVANAGARI, RTL_TEXTENCODING_INFO_MIME, false 
},
+#endif
         { RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_ASCII, false 
},
         { RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_MIME, true },
         { RTL_TEXTENCODING_ADOBE_STANDARD, RTL_TEXTENCODING_INFO_SYMBOL, false 
},
diff --git a/main/sal/textenc/convertadobe.tab 
b/main/sal/textenc/convertadobe.tab
index 2a59313889..1191211da2 100644
--- a/main/sal/textenc/convertadobe.tab
+++ b/main/sal/textenc/convertadobe.tab
@@ -233,7 +233,7 @@ adobeSymbolEncodingConverterData = {
 rtl::textenc::BmpUnicodeToSingleByteRange const 
unicodeToAdobeDingbatsEncoding[]
 = { { 0x0020, 0x0020 - 0x0020, 0x20 }, { 0x00A0, 0x00A0 - 0x00A0, 0x20 },
     { 0x2192, 0x2192 - 0x2192, 0xD5 }, { 0x2194, 0x2195 - 0x2194, 0xD6 },
-    { 0x2460, 0x2469 - 0x2460, 0xAB }, 
+    { 0x2460, 0x2469 - 0x2460, 0xAC },
     { 0x25A0, 0x25A0 - 0x25A0, 0x6E },
     { 0x25B2, 0x25B2 - 0x25B2, 0x73 }, { 0x25BC, 0x25BC - 0x25BC, 0x74 },
     { 0x25C6, 0x25C6 - 0x25C6, 0x75 }, { 0x25CF, 0x25CF - 0x25CF, 0x6C },
@@ -266,7 +266,7 @@ adobeDingbatsEncodingConverterData = {
       0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, // 0x50
       0x2738, 0x2739, 0x273A, 0x273B, 0x273C, 0x273D, 0x273E, 0x273F,
       0x2740, 0x2741, 0x2742, 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, // 0x60
-      0x2748, 0x2749, 0x274A, 0x274B, 0x27CF, 0x274D, 0x25A0, 0x274F,
+      0x2748, 0x2749, 0x274A, 0x274B, 0x25CF, 0x274D, 0x25A0, 0x274F,
       0x2750, 0x2751, 0x2752, 0x25B2, 0x25BC, 0x25C6, 0x2756, 0x25D7, // 0x70
       0x2758, 0x2759, 0x275A, 0x275B, 0x275C, 0x275D, 0x275E, 0xFFFF,
       0xF8D7, 0xF8D8, 0xF8D9, 0xF8DA, 0xF8DB, 0xF8DC, 0xF8DD, 0xF8DE, // 0x80
@@ -279,7 +279,7 @@ adobeDingbatsEncodingConverterData = {
       0x2778, 0x2779, 0x277A, 0x277B, 0x277C, 0x277D, 0x277E, 0x277F,
       0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 0x2787, // 0xC0
       0x2788, 0x2789, 0x278A, 0x278B, 0x278C, 0x278D, 0x278E, 0x278F,
-      0x2790, 0x2791, 0x2792, 0x2793, 0x2794, 0x2795, 0x2796, 0x2797, // 0xD0
+      0x2790, 0x2791, 0x2792, 0x2793, 0x2794, 0x2192, 0x2194, 0x2195, // 0xD0
       0x2798, 0x2799, 0x279A, 0x279B, 0x279C, 0x279D, 0x279E, 0x279F,
       0x27A0, 0x27A1, 0x27A2, 0x27A3, 0x27A4, 0x27A5, 0x27A6, 0x27A7, // 0xE0
       0x27A8, 0x27A9, 0x27AA, 0x27AB, 0x27AC, 0x27AD, 0x27AE, 0x27AF,
diff --git a/main/sal/textenc/textenc.cxx b/main/sal/textenc/textenc.cxx
index 746d97ce07..ddb207be39 100644
--- a/main/sal/textenc/textenc.cxx
+++ b/main/sal/textenc/textenc.cxx
@@ -210,11 +210,25 @@ Impl_getTextEncodingData(rtl_TextEncoding nEncoding) 
SAL_THROW_EXTERN_C()
             &aImplBig5HkscsTextEncodingData, /* BIG5_HKSCS */
             &aImplTis620TextEncodingData, /* TIS_620 */
             &aImplKoi8UTextEncodingData, /* KOI8_U */
+            NULL, /* ISCII_DEVANAGARI: converter removed (#i119141), but the
+                     slot MUST remain.  This array is indexed positionally by
+                     the (ABI-frozen) rtl_TextEncoding enum value, so a missing
+                     slot shifts every following encoding onto the wrong
+                     converter. */
             &aImplJavaUtf8TextEncodingData, /* JAVA_UTF8 */
             &adobeStandardEncodingData, /* ADOBE_STANDARD */
             &adobeSymbolEncodingData, /* ADOBE_SYMBOL */
             &aImplPT154TextEncodingData, /* PT154 */
             &adobeDingbatsEncodingData }; /* ADOBE_DINGBATS */
+    /* aData[] is indexed positionally by the (ABI-frozen) rtl_TextEncoding
+       enum, so its length must equal the highest enum value plus one.  Assert
+       this at compile time: dropping a slot (as happened to ISCII_DEVANAGARI,
+       #i119141) would otherwise silently shift every following encoding onto
+       the wrong converter, undetectable at run time because the bounds check
+       below just maps the now-out-of-range top encoding to NULL. */
+    typedef int Impl_textEncodingDataSizeCheck[
+        sizeof aData / sizeof aData[0]
+            == RTL_TEXTENCODING_ADOBE_DINGBATS + 1 ? 1 : -1];
     return
         nEncoding < sizeof aData / sizeof aData[0] ? aData[nEncoding] : NULL;
 }

Reply via email to