The root cause of this bug is the use of mbtowc in 64-egf-speedup.patch and 67-w.patch. These patches try to use mbtowc to look at the character before and after the match to check if the match is a whole word. But when a binary file is being grepped, mbtowc gets passed random junk rather than a valid UTF-8 character. As a consequence, its internal state gets messed up, and you get nonsense for the following matches. The fix is to use mbrtowc so you can reset its state. A patch is attached.
65-dfa-optional.patch is a red herring. I guess that patch just exposes the bug because it causes grep to use a different code path. But you get the same bug with grep -F, which is not touched by that patch. -- Peter De Wachter
--- a/build-tree/grep-2.5.3/src/search.c +++ b/build-tree/grep-2.5.3/src/search.c @@ -502,7 +502,7 @@ } else s = last_char; - mr = mbtowc (&pwc, s, match - s); + mr = mbrtowc (&pwc, s, match - s, &mbs); if (mr <= 0) { memset (&mbs, '\0', sizeof (mbstate_t)); @@ -531,8 +531,8 @@ wchar_t nwc; int mr; - mr = mbtowc (&nwc, buf + start + len, - end - buf - start - len - 1); + mr = mbrtowc (&nwc, buf + start + len, + end - buf - start - len - 1, &mbs); if (mr <= 0) { memset (&mbs, '\0', sizeof (mbstate_t)); @@ -941,7 +941,7 @@ } else s = last_char; - mr = mbtowc (&pwc, s, beg - s); + mr = mbrtowc (&pwc, s, beg - s, &mbs); if (mr <= 0) memset (&mbs, '\0', sizeof (mbstate_t)); else if ((iswalnum (pwc) || pwc == L'_') @@ -959,7 +959,7 @@ wchar_t nwc; int mr; - mr = mbtowc (&nwc, beg + len, buf + size - beg - len); + mr = mbrtowc (&nwc, beg + len, buf + size - beg - len, &mbs); if (mr <= 0) { memset (&mbs, '\0', sizeof (mbstate_t));