Hi Bruno,

Bruno Haible <br...@clisp.org> writes:

> In Unicode 16.0.0, there are composed characters whose constituents are
> Unicode characters with value >= 0x12000. A hardcoded bound in
> lib/uninorm/composition.c no longer works.

Thanks, I was going to look at the new 'DoNotEmit.txt' file added in
Unicode 16.0.0 to see if I could write anything useful with it.

I noticed a small difference in my output from gen-uni-tables.c, see below:

diff --git a/lib/unilbrk/lbrktables.c b/lib/unilbrk/lbrktables.c
index 30b0761927..e66f742946 100644
--- a/lib/unilbrk/lbrktables.c
+++ b/lib/unilbrk/lbrktables.c
@@ -76,7 +76,7 @@ const unsigned char unilbrk_table[41][41] =
 /*  VF */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  P,  I,  I,  D,  D,  I,  D,  
P,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  P,  D,  D, },
 /*  RI */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  P,  I,  I,  D,  D,  I,  D,  
P,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  P,  D,  D, },
 /* ZWJ */ {  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  I,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  D,  I,  I, },
-/*  EB */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  P,  I,  I,  D,  D,  I,  D,  
P,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  P,  D,  I, },
+/*  EB */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  P,  I,  I,  D,  D,  I,  D,  
P,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  D,  D,  I, },
 /*  EM */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  P,  I,  I,  D,  D,  I,  D,  
P,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  D,  
D,  D,  D,  D,  D, },
 /* "" */
 /* before */

Notice the P changed to a D in the third from the left column.

My Unicode data files are from the archive here:
https://www.unicode.org/Public/zipped/16.0.0/UCD.zip

Collin

Reply via email to