On Thu, Mar 22, 2007 at 09:21:37PM +0100, Bruno Haible wrote: > fnmatch() has a worst-case complexity O(m*n) where m is the size of the > pattern and n is the size of the sample string. Unfortunately glibc has > chosen an implementation with exponential running time. > > $ mkdir foo > $ cd foo > $ touch > aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkkllllmmmmnnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyy > $ time find . -name 'a*b*d*e*f*g*h*i*j*k*l*m*n*o*p*q*z*' > real 4m50.007s > user 4m44.002s > sys 0m0.056s
The following patch ought to fix it. When recursing for * handling it will stop when it hits next normal *, tell the caller what the current pattern and string pointers were and return to the caller which will restart from that place. I have also removed no_leading_period2 variables, since there is no need to preserve no_leading_period variable. Either the code will always return without ever looking at that var again (that was the only case before this patch), or it will reinitialize it from what the recursive call passed up. 2007-03-27 Jakub Jelinek <[EMAIL PROTECTED]> * posix/fnmatch.c (STRUCT): Define. (fnmatch): Pass NULL as last argument to internal_fn{,w}match. * posix/fnmatch_loop.c (struct STRUCT): New type. (FCT): Add ends argument. If ends != NULL and normal * is seen in the pattern, store current pattern and string pointers and return. Adjust recursive calls. (EXT): Adjust FCT callers. (STRUCT): Undef at the end of the file. * posix/Makefile (tests): Add tst-fnmatch2. * posix/tst-fnmatch2.c: New test. --- libc/posix/fnmatch.c.jj 2005-03-30 06:17:47.000000000 +0200 +++ libc/posix/fnmatch.c 2007-03-27 10:31:25.000000000 +0200 @@ -1,4 +1,4 @@ -/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003 +/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2007 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -209,6 +209,7 @@ __wcschrnul (s, c) # define FCT internal_fnmatch # define EXT ext_match # define END end_pattern +# define STRUCT fnmatch_struct # define L(CS) CS # ifdef _LIBC # define BTOWC(C) __btowc (C) @@ -235,7 +236,8 @@ __wcschrnul (s, c) # define INT wint_t # define FCT internal_fnwmatch # define EXT ext_wmatch -# define END end_wpattern +# define END end_wpattern +# define STRUCT fnwmatch_struct # define L(CS) L##CS # define BTOWC(C) (C) # define STRLEN(S) __wcslen (S) @@ -397,12 +399,12 @@ fnmatch (pattern, string, flags) } return internal_fnwmatch (wpattern, wstring, wstring + n, - flags & FNM_PERIOD, flags); + flags & FNM_PERIOD, flags, NULL); } # endif /* mbstate_t and mbsrtowcs or _LIBC. */ return internal_fnmatch (pattern, string, string + strlen (string), - flags & FNM_PERIOD, flags); + flags & FNM_PERIOD, flags, NULL); } # ifdef _LIBC --- libc/posix/fnmatch_loop.c.jj 2005-10-15 00:44:42.000000000 +0200 +++ libc/posix/fnmatch_loop.c 2007-03-27 11:47:12.000000000 +0200 @@ -1,5 +1,5 @@ -/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005 - Free Software Foundation, Inc. +/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005, + 2007 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,10 +17,18 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +struct STRUCT +{ + const CHAR *pattern; + const CHAR *string; + int no_leading_period; +}; + /* Match STRING against the filename pattern PATTERN, returning zero if it matches, nonzero if not. */ static int FCT (const CHAR *pattern, const CHAR *string, - const CHAR *string_end, int no_leading_period, int flags) + const CHAR *string_end, int no_leading_period, int flags, + struct STRUCT *ends) internal_function; static int EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, int no_leading_period, int flags) @@ -29,12 +37,13 @@ static const CHAR *END (const CHAR *patt static int internal_function -FCT (pattern, string, string_end, no_leading_period, flags) +FCT (pattern, string, string_end, no_leading_period, flags, ends) const CHAR *pattern; const CHAR *string; const CHAR *string_end; int no_leading_period; int flags; + struct STRUCT *ends; { register const CHAR *p = pattern, *n = string; register UCHAR c; @@ -97,6 +106,13 @@ FCT (pattern, string, string_end, no_lea if (res != -1) return res; } + else if (ends != NULL) + { + ends->pattern = p - 1; + ends->string = n; + ends->no_leading_period = no_leading_period; + return 0; + } if (n != string_end && *n == L('.') && no_leading_period) return FNM_NOMATCH; @@ -157,7 +173,9 @@ FCT (pattern, string, string_end, no_lea else { const CHAR *endp; + struct STRUCT end; + end.pattern = NULL; endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), string_end - n); if (endp == NULL) @@ -170,36 +188,46 @@ FCT (pattern, string, string_end, no_lea { int flags2 = ((flags & FNM_FILE_NAME) ? flags : (flags & ~FNM_PERIOD)); - int no_leading_period2 = no_leading_period; - for (--p; n < endp; ++n, no_leading_period2 = 0) - if (FCT (p, n, string_end, no_leading_period2, flags2) - == 0) - return 0; + for (--p; n < endp; ++n, no_leading_period = 0) + if (FCT (p, n, string_end, no_leading_period, flags2, + &end) == 0) + goto found; } else if (c == L('/') && (flags & FNM_FILE_NAME)) { while (n < string_end && *n != L('/')) ++n; if (n < string_end && *n == L('/') - && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) - == 0)) + && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, + NULL) == 0)) return 0; } else { int flags2 = ((flags & FNM_FILE_NAME) ? flags : (flags & ~FNM_PERIOD)); - int no_leading_period2 = no_leading_period; if (c == L('\\') && !(flags & FNM_NOESCAPE)) c = *p; c = FOLD (c); - for (--p; n < endp; ++n, no_leading_period2 = 0) + for (--p; n < endp; ++n, no_leading_period = 0) if (FOLD ((UCHAR) *n) == c - && (FCT (p, n, string_end, no_leading_period2, flags2) - == 0)) - return 0; + && (FCT (p, n, string_end, no_leading_period, flags2, + &end) == 0)) + { + found: + if (end.pattern == NULL) + return 0; + break; + } + if (end.pattern != NULL) + { + p = end.pattern; + n = end.string; + no_leading_period = end.no_leading_period; + continue; + } } } @@ -1098,7 +1126,7 @@ EXT (INT opt, const CHAR *pattern, const switch (opt) { case L('*'): - if (FCT (p, string, string_end, no_leading_period, flags) == 0) + if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) return 0; /* FALLTHROUGH */ @@ -1109,7 +1137,8 @@ EXT (INT opt, const CHAR *pattern, const /* First match the prefix with the current pattern with the current pattern. */ if (FCT (list->str, string, rs, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 + flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, + NULL) == 0 /* This was successful. Now match the rest with the rest of the pattern. */ && (FCT (p, rs, string_end, @@ -1117,7 +1146,7 @@ EXT (INT opt, const CHAR *pattern, const ? no_leading_period : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, flags & FNM_FILE_NAME - ? flags : flags & ~FNM_PERIOD) == 0 + ? flags : flags & ~FNM_PERIOD, NULL) == 0 /* This didn't work. Try the whole pattern. */ || (rs != string && FCT (pattern - 1, rs, string_end, @@ -1126,7 +1155,7 @@ EXT (INT opt, const CHAR *pattern, const : (rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0), flags & FNM_FILE_NAME - ? flags : flags & ~FNM_PERIOD) == 0))) + ? flags : flags & ~FNM_PERIOD, NULL) == 0))) /* It worked. Signal success. */ return 0; } @@ -1136,7 +1165,7 @@ EXT (INT opt, const CHAR *pattern, const return FNM_NOMATCH; case L('?'): - if (FCT (p, string, string_end, no_leading_period, flags) == 0) + if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) return 0; /* FALLTHROUGH */ @@ -1148,7 +1177,8 @@ EXT (INT opt, const CHAR *pattern, const pattern list. */ if (FCT (STRCAT (list->str, p), string, string_end, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) + flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, + NULL) == 0) /* It worked. Signal success. */ return 0; while ((list = list->next) != NULL); @@ -1163,7 +1193,8 @@ EXT (INT opt, const CHAR *pattern, const for (runp = list; runp != NULL; runp = runp->next) if (FCT (runp->str, string, rs, no_leading_period, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) + flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, + NULL) == 0) break; /* If none of the patterns matched see whether the rest does. */ @@ -1172,8 +1203,8 @@ EXT (INT opt, const CHAR *pattern, const rs == string ? no_leading_period : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, - flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) - == 0)) + flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, + NULL) == 0)) /* This is successful. */ return 0; } @@ -1198,6 +1229,7 @@ EXT (INT opt, const CHAR *pattern, const #undef FCT #undef EXT #undef END +#undef STRUCT #undef MEMPCPY #undef MEMCHR #undef STRCOLL --- libc/posix/Makefile.jj 2007-02-26 18:13:45.000000000 +0100 +++ libc/posix/Makefile 2007-03-27 11:07:05.000000000 +0200 @@ -90,7 +90,7 @@ tests := tstgetopt testfnm runtests run tst-execv1 tst-execv2 tst-execl1 tst-execl2 \ tst-execve1 tst-execve2 tst-execle1 tst-execle2 \ tst-execvp3 tst-execvp4 tst-rfc3484 tst-rfc3484-2 \ - tst-getaddrinfo3 + tst-getaddrinfo3 tst-fnmatch2 xtests := bug-ga2 ifeq (yes,$(build-shared)) test-srcs := globtest --- libc/posix/tst-fnmatch2.c.jj 2007-03-27 11:06:37.000000000 +0200 +++ libc/posix/tst-fnmatch2.c 2007-03-27 11:55:15.000000000 +0200 @@ -0,0 +1,35 @@ +#include <fnmatch.h> +#include <stdio.h> + +int +do_test (void) +{ + char pattern[] = "a*b*c*d*e*f*g*h*i*j*k*l*m*n*o*p*q*r*s*t*u*v*w*x*y*z*"; + const char *string = "aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkkllllmmmm" + "nnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyy"; + if (fnmatch (pattern, string, 0) != FNM_NOMATCH) + { + puts ("First fnmatch didn't return FNM_NOMATCH"); + return 1; + } + pattern[(sizeof pattern) - 3] = '*'; + if (fnmatch (pattern, string, 0) != 0) + { + puts ("Second fnmatch didn't return 0"); + return 1; + } + if (fnmatch ("a*b/*", "abbb/.x", FNM_PATHNAME | FNM_PERIOD) != FNM_NOMATCH) + { + puts ("Third fnmatch didn't return FNM_NOMATCH"); + return 1; + } + if (fnmatch ("a*b/*", "abbb/xy", FNM_PATHNAME | FNM_PERIOD) != 0) + { + puts ("Fourth fnmatch didn't return 0"); + return 1; + } + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" Jakub