With clang 19, I see a warning:

../../gllib/unictype/scripts.h:2637:26: warning: implicit conversion from 'int' 
to 'short' changes value from 32768 to -32768 [-Wconstant-conversion]

The cause is that through the Unicode 16 upgrade, the level3 array in the
generated scripts.h grew from 248 blocks to 257 blocks.

This patch fixes it.


2024-09-20  Bruno Haible  <br...@clisp.org>

        unictype/scripts: Fix integer overflow in generated table.
        Reported by clang through a -Wconstant-conversion warning.
        * lib/gen-uni-tables.c (output_scripts): Generate a level2 array of
        'unsigned short', not 'short', values.
        * lib/unictype/scripts.h: Regenerated.
        * lib/unictype/scripts.c (uc_script): Update accordingly.

diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index 003d4d7b99..617af649f1 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -5356,7 +5356,7 @@ output_scripts (const char *version)
   fprintf (stream, "struct\n");
   fprintf (stream, "  {\n");
   fprintf (stream, "    int level1[%zu];\n", t.level1_size);
-  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned short level2[%zu << %d];\n", t.level2_size, 
t.q);
   fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size, 
t.p);
   fprintf (stream, "  }\n");
   fprintf (stream, "u_script =\n");
@@ -5390,11 +5390,13 @@ output_scripts (const char *version)
       if (i > 0 && (i % 8) == 0)
         fprintf (stream, "\n   ");
       offset = ((uint32_t *) (t.result + level2_offset))[i];
+      /* To make the level2 values fit in 16 bits, we use 'unsigned short'
+         instead of 'short' and add 1 to each value.  */
       if (offset == 0)
-        fprintf (stream, " %5d", -1);
+        fprintf (stream, " %5d", -1 + 1);
       else
         fprintf (stream, " %5zu",
-                 (offset - level3_offset) / sizeof (uint8_t));
+                 (offset - level3_offset) / sizeof (uint8_t) + 1);
       if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
     }
diff --git a/lib/unictype/scripts.c b/lib/unictype/scripts.c
index a226d8da28..1d9d13aaad 100644
--- a/lib/unictype/scripts.c
+++ b/lib/unictype/scripts.c
@@ -35,11 +35,11 @@ uc_script (ucs4_t uc)
       if (lookup1 >= 0)
         {
           unsigned int index2 = (uc >> script_header_2) & script_header_3;
-          int lookup2 = u_script.level2[lookup1 + index2];
-          if (lookup2 >= 0)
+          unsigned int lookup2 = u_script.level2[lookup1 + index2];
+          if (lookup2 > 0)
             {
               unsigned int index3 = (uc & script_header_4);
-              unsigned char lookup3 = u_script.level3[lookup2 + index3];
+              unsigned char lookup3 = u_script.level3[(lookup2 - 1) + index3];
 
               if (lookup3 != 0xff)
                 return &scripts[lookup3];
diff --git a/lib/unictype/scripts.h b/lib/unictype/scripts.h
index 0052dbaa70..0dbb7594f1 100644
--- a/lib/unictype/scripts.h
+++ b/lib/unictype/scripts.h
@@ -2368,7 +2368,7 @@ static const
 struct
   {
     int level1[15];
-    short level2[5 << 9];
+    unsigned short level2[5 << 9];
     unsigned char level3[257 << 7];
   }
 u_script =
@@ -2378,326 +2378,326 @@ u_script =
        -1,    -1,    -1,    -1,    -1,    -1,  2048
   },
   {
-        0,   128,   256,   256,   256,   384,   512,   640,
-      768,   896,  1024,  1152,  1280,  1408,  1536,  1664,
...
+        1,   129,   257,   257,   257,   385,   513,   641,
+      769,   897,  1025,  1153,  1281,  1409,  1537,  1665,
...
},
   {
       0,   0,   0,   0,   0,   0,   0,   0,




Reply via email to