The unilbrk implementation does not implement the rules sequentially, but uses a table and as few as possible 'if' conditions in the main code flow, for speed. This has the effect that in some cases, the implementation of a rule with lower priority has side effects on a rule with higher priority.
This patch fixes this, in those cases that became apparent from LineBreakTest.txt. 2024-09-15 Bruno Haible <br...@clisp.org> unilbrk: Fix conflicts between rules. * lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks_loop): Avoid side effect of (LB18) on (LB14) and (LB15a). Avoid side effect of (LB19a) on (LB18). * lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks_loop): Likewise. * lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks_loop): Likewise. diff --git a/lib/unilbrk/u16-possible-linebreaks.c b/lib/unilbrk/u16-possible-linebreaks.c index bf0f31057f..9678041606 100644 --- a/lib/unilbrk/u16-possible-linebreaks.c +++ b/lib/unilbrk/u16-possible-linebreaks.c @@ -190,7 +190,12 @@ u16_possible_linebreaks_loop (const uint16_t *s, size_t n, const char *encoding, last_prop = LBP_AL1; seen_space = NULL; } - else if (last_prop == LBP_ZW || seen_space != NULL) + else if (last_prop == LBP_ZW + || (seen_space != NULL + /* (LB14) has higher priority than (LB18). */ + && !(last_prop == LBP_OP1 || last_prop == LBP_OP2) + /* (LB15a) has higher priority than (LB18). */ + && !(last_prop == LBP_QU2))) { /* (LB8) Break after zero-width space. */ /* (LB18) Break after spaces. @@ -236,7 +241,9 @@ u16_possible_linebreaks_loop (const uint16_t *s, size_t n, const char *encoding, *p = UC_BREAK_POSSIBLE; } else if (((prop == LBP_QU1 || prop == LBP_QU2 || prop == LBP_QU3) - && (! prev_ea || ! EA (lookahead1_prop_ea))) + && (! prev_ea || ! EA (lookahead1_prop_ea)) + /* (LB18) has higher priority than (LB19a). */ + && prev_prop != LBP_SP) || ((prev_prop == LBP_QU1 || prev_prop == LBP_QU2 || prev_prop == LBP_QU3) && (! prev2_ea || ! ea))) { diff --git a/lib/unilbrk/u32-possible-linebreaks.c b/lib/unilbrk/u32-possible-linebreaks.c index a7630467a6..c12ca3abbb 100644 --- a/lib/unilbrk/u32-possible-linebreaks.c +++ b/lib/unilbrk/u32-possible-linebreaks.c @@ -183,7 +183,12 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, last_prop = LBP_AL1; seen_space = NULL; } - else if (last_prop == LBP_ZW || seen_space != NULL) + else if (last_prop == LBP_ZW + || (seen_space != NULL + /* (LB14) has higher priority than (LB18). */ + && !(last_prop == LBP_OP1 || last_prop == LBP_OP2) + /* (LB15a) has higher priority than (LB18). */ + && !(last_prop == LBP_QU2))) { /* (LB8) Break after zero-width space. */ /* (LB18) Break after spaces. @@ -229,7 +234,9 @@ u32_possible_linebreaks_loop (const uint32_t *s, size_t n, const char *encoding, *p = UC_BREAK_POSSIBLE; } else if (((prop == LBP_QU1 || prop == LBP_QU2 || prop == LBP_QU3) - && (! prev_ea || ! EA (lookahead1_prop_ea))) + && (! prev_ea || ! EA (lookahead1_prop_ea)) + /* (LB18) has higher priority than (LB19a). */ + && prev_prop != LBP_SP) || ((prev_prop == LBP_QU1 || prev_prop == LBP_QU2 || prev_prop == LBP_QU3) && (! prev2_ea || ! ea))) { diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c index 7abf20beed..3f32ee6289 100644 --- a/lib/unilbrk/u8-possible-linebreaks.c +++ b/lib/unilbrk/u8-possible-linebreaks.c @@ -190,7 +190,12 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding, last_prop = LBP_AL1; seen_space = NULL; } - else if (last_prop == LBP_ZW || seen_space != NULL) + else if (last_prop == LBP_ZW + || (seen_space != NULL + /* (LB14) has higher priority than (LB18). */ + && !(last_prop == LBP_OP1 || last_prop == LBP_OP2) + /* (LB15a) has higher priority than (LB18). */ + && !(last_prop == LBP_QU2))) { /* (LB8) Break after zero-width space. */ /* (LB18) Break after spaces. @@ -236,7 +241,9 @@ u8_possible_linebreaks_loop (const uint8_t *s, size_t n, const char *encoding, *p = UC_BREAK_POSSIBLE; } else if (((prop == LBP_QU1 || prop == LBP_QU2 || prop == LBP_QU3) - && (! prev_ea || ! EA (lookahead1_prop_ea))) + && (! prev_ea || ! EA (lookahead1_prop_ea)) + /* (LB18) has higher priority than (LB19a). */ + && prev_prop != LBP_SP) || ((prev_prop == LBP_QU1 || prev_prop == LBP_QU2 || prev_prop == LBP_QU3) && (! prev2_ea || ! ea))) {