Florian Weimer wrote:
> gen-uni-tables.c produces types like this:
> 
> struct
>   {
>     int header[1];
>     int level1[2];
>     short level2[2 << 7];
>     /*unsigned*/ int level3[16 << 4];
>   }
> 
> Why is the unsigned commented out?

I think, at the time the code was written, some K&R C compilers were still in
use that did accept an initialization

  struct { int x; } table = { 0xFFFFFFFF };

but did not accept an initialization

  struct { unsigned int x; } table = { 0xFFFFFFFFU };

So, I made the element type an 'int'
  - in gen-uni-tables.c line 744,
  - e.g. in unictype/categ_Cc.h line 14,
  - in unictype/bitmap.h line 40.
But as you can see, the value is being cast back from 'int' to 'unsigned int'
immediately after being fetched from the table. So all is fine.

> Some of the constants are so large
> that they are treated as unsigned ints.

Indeed ISO C 99 and newer (ยง 6.4.4.1.(5)) specify that 0xFFFFFFFF is a priori
an unsigned integer. But when used as an initializer for an 'int' it poses
no problem. All machines nowadays use two's complement.

gcc 10.2.0 and clang 9.0.0 give no warning on the attached code (combined
code from unictype/categ_Cc.c, unictype/categ_Cc.h, unictype/bitmap.h),
even with "-Wall -std=gnu18".

> This kind of narrowing initialization is no longer valid C++.

Indeed, GCC 10.2.0 and clang give errors about this code when uses as C++ code:

$ gcc -Wall -O -S -x c++ foo.c
foo.c:156:1: error: narrowing conversion of '4294967295' from 'unsigned int' to 
'int' [-Wnarrowing]
  156 | };
      | ^
foo.c:156:1: error: narrowing conversion of '2147483648' from 'unsigned int' to 
'int' [-Wnarrowing]
foo.c:156:1: error: narrowing conversion of '4294967295' from 'unsigned int' to 
'int' [-Wnarrowing]

But why would this matter? This is C code, not C++ code.

Bruno

#include <stdbool.h>
typedef unsigned int ucs4_t;

#define header_0 16
#define header_2 9
#define header_3 127
#define header_4 15
static const
struct
  {
    int header[1];
    int level1[1];
    short level2[1 << 7];
    /*unsigned*/ int level3[1 << 4];
  }
u_categ_Cc =
{
  { 1 },
  {     2 * sizeof (int) / sizeof (short) +     0 },
  {
        2 +   128 * sizeof (short) / sizeof (int) +     0,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1,
       -1
  },
  {
    0xFFFFFFFF, 0x00000000, 0x00000000, 0x80000000,
    0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000,
    0x00000000, 0x00000000, 0x00000000, 0x00000000
  }
};

static inline int
bitmap_lookup (const void *table, ucs4_t uc)
{
  unsigned int index1 = uc >> header_0;
  if (index1 < ((const int *) table)[0])
    {
      int lookup1 = ((const int *) table)[1 + index1];
      if (lookup1 >= 0)
        {
          unsigned int index2 = (uc >> header_2) & header_3;
          int lookup2 = ((const short *) table)[lookup1 + index2];
          if (lookup2 >= 0)
            {
              unsigned int index3 = (uc >> 5) & header_4;
              unsigned int lookup3 = ((const int *) table)[lookup2 + index3];

              return (lookup3 >> (uc & 0x1f)) & 1;
            }
        }
    }
  return 0;
}

bool
uc_is_general_category_Cc (ucs4_t uc)
{
  return bitmap_lookup (&u_categ_Cc, uc);
}

Reply via email to