Some linebreaking results are not so good IMO. I'm tailoring the algorithm to allow linebreaks between a period and a letter/symbol.
2008-05-12 Bruno Haible <[EMAIL PROTECTED]> Tailor the linebreaking algorithm. * lib/unilbrk/tables.c (unilbrk_table): Change (IS,AL) entry. --- lib/unilbrk/tables.c.orig 2008-05-12 19:15:59.000000000 +0200 +++ lib/unilbrk/tables.c 2008-05-12 16:00:02.000000000 +0200 @@ -39,7 +39,7 @@ /* NS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, /* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, }, /* QU */ { P, I, I, I, I, I, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* IS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, I, D, D, D, D, D, D, }, +/* IS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, /* NU */ { P, I, D, I, D, I, P, P, I, I, D, I, P, I, I, I, P, I, D, D, D, D, D, D, }, /* PO */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, }, /* PR */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, }, @@ -54,6 +54,10 @@ /* "" */ /* before */ }; +/* Note: The (IS,AL) entry has been changed from I to D. In other words, the + rule "Do not break between numeric punctuation and alphabetics" is not + implemented here. We want to break before the HTML tag in strings like + "<P>Some sentence.</P>" */ /* Note: The (B2,B2) entry should probably be D instead of P. */ /* Note: The (PR,ID) entry should probably be D instead of I. */ /* Note: The (WJ,*) and (GL,*) entries should probably be P instead of I. */