On Sun, 8 Jan 2017 12:49:42 -0800 Paul Eggert <egg...@cs.ucla.edu> wrote:
> Assaf Gordon wrote: > > The immediate cause is somewhere in gnulib's DFA module. > > The bug was introduced in Gnulib, in commit > 403adf1b40897ba108075008c10bd38d937e1539 > dated 2016-11-25 and labeled "dfa: addition of new state on demand". > It's not a bug that grep runs into, since grep doesn't use the > newline transition that sed does. I installed the attached patch to > fix the Gnulib bug. I'll leave Bug#25390 open, as I assume you'll > want to check it for 'sed' and add a test case for 'sed'. Thanks for fixing quickly. I wrote two additional patches for dfa. First, derive number of allocation from not argument but number of state in transition table allocation. Second, melt down dfastate() into build_state(). Now, I think that there do not have to be separated. I also wrote a simple test, but the issue are not always caused, as it depends on state of memory. Should we rely to complate the test on valgrind?
From 2a4a8b2b75d08c05803ea0580de152058852ac04 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka <nori...@kcn.ne.jp> Date: Mon, 9 Jan 2017 07:46:13 +0900 Subject: [PATCH 1/2] dfa: simplify transition table allocation * src/dfa.c (realloc_trans_if_necessary): Remove second argument. Its value is derived from other variable. Update callers. (dfastate): Remove calculation of max number of state. --- lib/dfa.c | 44 ++++++++++++++++++++------------------------ 1 files changed, 20 insertions(+), 24 deletions(-) diff --git a/lib/dfa.c b/lib/dfa.c index 141888a..bda4602 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -2576,14 +2576,14 @@ dfaanalyze (struct dfa *d, bool searchflag) /* Make sure D's state arrays are large enough to hold NEW_STATE. */ static void -realloc_trans_if_necessary (struct dfa *d, state_num new_state) +realloc_trans_if_necessary (struct dfa *d) { state_num oldalloc = d->tralloc; - if (oldalloc <= new_state) + if (oldalloc < d->sindex) { state_num **realtrans = d->trans ? d->trans - 2 : NULL; ptrdiff_t newalloc1 = realtrans ? d->tralloc + 2 : 0; - realtrans = xpalloc (realtrans, &newalloc1, new_state - oldalloc + 1, + realtrans = xpalloc (realtrans, &newalloc1, d->sindex - oldalloc, -1, sizeof *realtrans); realtrans[0] = realtrans[1] = NULL; d->trans = realtrans + 2; @@ -2825,6 +2825,9 @@ dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[]) state_letter = state_index (d, &follows, CTX_LETTER); else state_letter = state; + + /* Reallocate now, to reallocate any newline transition properly. */ + realloc_trans_if_necessary (d); } /* If we are a searching matcher, the default transition is to a state @@ -2847,22 +2850,18 @@ dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[]) state_num maxstate = -1; for (size_t i = 0; i < NOTCHAR; i++) if (tstbit (i, &label)) - { - switch (d->syntax.sbit[i]) - { - case CTX_NEWLINE: - trans[i] = state_newline; - break; - case CTX_LETTER: - trans[i] = state_letter; - break; - default: - trans[i] = state; - break; - } - if (maxstate < trans[i]) - maxstate = trans[i]; - } + switch (d->syntax.sbit[i]) + { + case CTX_NEWLINE: + trans[i] = state_newline; + break; + case CTX_LETTER: + trans[i] = state_letter; + break; + default: + trans[i] = state; + break; + } #ifdef DEBUG fprintf (stderr, "trans table %td", s); @@ -2879,9 +2878,6 @@ dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[]) free (follows.elems); free (tmp.elems); - /* Reallocate now, to reallocate any newline transition properly. */ - realloc_trans_if_necessary (d, maxstate); - /* Keep the newline transition in a special place so we can use it as a sentinel. */ if (tstbit (d->syntax.eolbyte, &label)) @@ -3042,7 +3038,7 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp, int separate_contexts = state_separate_contexts (&d->mb_follows); state_num s2 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY); - realloc_trans_if_necessary (d, s2); + realloc_trans_if_necessary (d); d->mb_trans[s][d->states[s1].mb_trindex] = s2; @@ -3137,7 +3133,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, } if (!d->tralloc) - realloc_trans_if_necessary (d, 0); + realloc_trans_if_necessary (d); /* Current state. */ state_num s = 0, s1 = 0; -- 1.7.1
From ca47ba16b959a45fff75b3b31e1a1dc4f0c4adb5 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka <nori...@kcn.ne.jp> Date: Mon, 9 Jan 2017 08:21:21 +0900 Subject: [PATCH 2/2] dfa: melt down dfastate into build_state * src/dfa.c (dfastate): Remove it. (build_state): Insert content of dfastate() to bottom. --- lib/dfa.c | 97 ++++++++++++++++++++++++++++-------------------------------- 1 files changed, 45 insertions(+), 52 deletions(-) diff --git a/lib/dfa.c b/lib/dfa.c index bda4602..6896ed3 100644 --- a/lib/dfa.c +++ b/lib/dfa.c @@ -2609,8 +2609,10 @@ realloc_trans_if_necessary (struct dfa *d) } } -/* Return the transition out of state s of d for the input character uc, - updating the slots in trans accordingly. +/* + Calculate the transition table for a new state derived from state s + for a compiled dfa d after input character uc, and return the new + state number. Do not worry about all possible input characters; calculate just the group of positions that match uc. Label it with the set of characters that @@ -2639,8 +2641,9 @@ realloc_trans_if_necessary (struct dfa *d) If after comparing with every group there are characters remaining in C, create a new group labeled with the characters of C and insert this position in that group. */ + static state_num -dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[]) +build_state (state_num s, struct dfa *d, unsigned char uc) { position_set follows; /* Union of the follows of the group. */ position_set tmp; /* Temporary space for merging sets. */ @@ -2652,6 +2655,45 @@ dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[]) fprintf (stderr, "build state %td\n", s); #endif + /* A pointer to the new transition table, and the table itself. */ + state_num **ptrans = (accepting (s, d) ? d->fails : d->trans) + s; + state_num *trans = *ptrans; + + if (!trans) + { + /* MAX_TRCOUNT is an arbitrary upper limit on the number of + transition tables that can exist at once, other than for + initial states. Often-used transition tables are quickly + rebuilt, whereas rarely-used ones are cleared away. */ + if (MAX_TRCOUNT <= d->trcount) + { + for (state_num i = d->min_trcount; i < d->tralloc; i++) + { + free (d->trans[i]); + free (d->fails[i]); + d->trans[i] = d->fails[i] = NULL; + } + d->trcount = 0; + } + + d->trcount++; + *ptrans = trans = xmalloc (NOTCHAR * sizeof *trans); + + /* Fill transition table with a default value which means that the + transited state has not been calculated yet. */ + for (int i = 0; i < NOTCHAR; i++) + trans[i] = -2; + } + + /* Set up the success bits for this state. */ + d->success[s] = 0; + if (accepts_in_context (d->states[s].context, CTX_NEWLINE, s, d)) + d->success[s] |= CTX_NEWLINE; + if (accepts_in_context (d->states[s].context, CTX_LETTER, s, d)) + d->success[s] |= CTX_LETTER; + if (accepts_in_context (d->states[s].context, CTX_NONE, s, d)) + d->success[s] |= CTX_NONE; + /* Positions that match the input char. */ leaf_set group; group.elems = xnmalloc (d->nleaves, sizeof *group.elems); @@ -2889,55 +2931,6 @@ dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[]) return trans[uc]; } -/* Calculate the transition table for a new state derived from state s - for a compiled dfa d after input character uc, and return the new - state number. */ - -static state_num -build_state (state_num s, struct dfa *d, unsigned char uc) -{ - /* A pointer to the new transition table, and the table itself. */ - state_num **ptrans = (accepting (s, d) ? d->fails : d->trans) + s; - state_num *trans = *ptrans; - - if (!trans) - { - /* MAX_TRCOUNT is an arbitrary upper limit on the number of - transition tables that can exist at once, other than for - initial states. Often-used transition tables are quickly - rebuilt, whereas rarely-used ones are cleared away. */ - if (MAX_TRCOUNT <= d->trcount) - { - for (state_num i = d->min_trcount; i < d->tralloc; i++) - { - free (d->trans[i]); - free (d->fails[i]); - d->trans[i] = d->fails[i] = NULL; - } - d->trcount = 0; - } - - d->trcount++; - *ptrans = trans = xmalloc (NOTCHAR * sizeof *trans); - - /* Fill transition table with a default value which means that the - transited state has not been calculated yet. */ - for (int i = 0; i < NOTCHAR; i++) - trans[i] = -2; - } - - /* Set up the success bits for this state. */ - d->success[s] = 0; - if (accepts_in_context (d->states[s].context, CTX_NEWLINE, s, d)) - d->success[s] |= CTX_NEWLINE; - if (accepts_in_context (d->states[s].context, CTX_LETTER, s, d)) - d->success[s] |= CTX_LETTER; - if (accepts_in_context (d->states[s].context, CTX_NONE, s, d)) - d->success[s] |= CTX_NONE; - - return dfastate (s, d, uc, trans); -} - /* Multibyte character handling sub-routines for dfaexec. */ /* Consume a single byte and transit state from 's' to '*next_state'. -- 1.7.1
From 0c398a69f6bf797388ee7f59bd850da4ddd0b5fe Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka <nori...@kcn.ne.jp> Date: Mon, 9 Jan 2017 08:54:28 +0900 Subject: [PATCH] tests: new test for dfa crash bug Maybe dfa crashes if multi-lines are read in pattern space. It is fixed at commit 823b5cb589366f7c8742503af980803afad0978f in gnulib. Reported by S. Gilles in https://bugs.gnu.org/25390 * testsuite/newline.good, testsuite/newline.sed, testsuite/newline.sed: New test. * testsuite/Makefile.tests: Add the test. * testsuite/local.mk: Add the test. --- testsuite/Makefile.tests | 3 ++- testsuite/local.mk | 6 +++++- testsuite/newline.good | 1 + testsuite/newline.inp | 2 ++ testsuite/newline.sed | 2 ++ 5 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 testsuite/newline.good create mode 100644 testsuite/newline.inp create mode 100644 testsuite/newline.sed diff --git a/testsuite/Makefile.tests b/testsuite/Makefile.tests index 00a4ec8..53bd83d 100644 --- a/testsuite/Makefile.tests +++ b/testsuite/Makefile.tests @@ -22,7 +22,8 @@ SKIP = :>$@.skip; exit 77 enable sep inclib 8bit 8to7 newjis xabcx dollar noeol bkslashes \ numsub head madding mac-mf empty xbxcx xbxcx3 recall recall2 xemacs \ appquit fasts uniq manis linecnt khadafy allsub flipcase space modulo \ -y-bracket y-newline y-zero insert brackets amp-escape newline-anchor:: +y-bracket y-newline y-zero insert brackets amp-escape newline-anchor \ +newline:: $(SEDENV) $(SED) -f $(srcdir)/$@.sed \ < $(srcdir)/$@.inp | $(elide_cr) > $@.out $(CMP) $(srcdir)/$@.good $@.out diff --git a/testsuite/local.mk b/testsuite/local.mk index fae6225..4ba0298 100644 --- a/testsuite/local.mk +++ b/testsuite/local.mk @@ -109,7 +109,8 @@ SEDTESTS += testsuite/appquit testsuite/enable testsuite/sep \ testsuite/amp-escape testsuite/help testsuite/file \ testsuite/quiet testsuite/factor testsuite/binary3 \ testsuite/binary2 testsuite/binary testsuite/dc \ - testsuite/newline-anchor testsuite/zero-anchor + testsuite/newline-anchor testsuite/zero-anchor \ + testsuite/newline # Note that the first lines are statements. They ensure that environment # variables that can perturb tests are unset or set to expected values. @@ -275,6 +276,9 @@ EXTRA_DIST += \ testsuite/newjis.good \ testsuite/newjis.inp \ testsuite/newjis.sed \ + testsuite/newline.good \ + testsuite/newline.inp \ + testsuite/newline.sed \ testsuite/newline-anchor.good \ testsuite/newline-anchor.inp \ testsuite/newline-anchor.sed \ diff --git a/testsuite/newline.good b/testsuite/newline.good new file mode 100644 index 0000000..587be6b --- /dev/null +++ b/testsuite/newline.good @@ -0,0 +1 @@ +x diff --git a/testsuite/newline.inp b/testsuite/newline.inp new file mode 100644 index 0000000..b211856 --- /dev/null +++ b/testsuite/newline.inp @@ -0,0 +1,2 @@ +123456789abcd +x diff --git a/testsuite/newline.sed b/testsuite/newline.sed new file mode 100644 index 0000000..e85852d --- /dev/null +++ b/testsuite/newline.sed @@ -0,0 +1,2 @@ +N +s/123456789abcd\n// -- 1.7.1