I'm not confident in what the right behavior is here, and maybe there is no obvious one, but I _think_ this is not desirable:
If an unquoted backslash is followed by a quoted globbing character, quote_string_for_globbing will store the unquoted backslash and then also another one to quote the character -- resulting in the originally quoted character becoming unquoted: $ bash -cx '[[ \\FOO == $1"*" ]]' _ '\'; echo $? + [[ \FOO == \\* ]] 0 This patch would need some more work but I wanted to see what the correct behaviour should be first. Also, I don't really understand how this function can work correctly without the QGLOB_CTLNUL flag -- it seems there wouldn't be a way to distinguish literal CTLESC's from ones serving the quoting function. --- pathexp.c | 98 +++++++++++++------------------------------------------ 1 file changed, 22 insertions(+), 76 deletions(-) diff --git a/pathexp.c b/pathexp.c index 94df66ac..742f7ab2 100644 --- a/pathexp.c +++ b/pathexp.c @@ -203,9 +203,8 @@ quote_string_for_globbing (const char *pathname, int qflags) { char *temp; register int i, j; - int cclass, collsym, equiv, c, last_was_backslash; + int cclass, collsym, equiv, c; int savei, savej; - unsigned char cc; temp = (char *)xmalloc (2 * strlen (pathname) + 1); @@ -215,55 +214,32 @@ quote_string_for_globbing (const char *pathname, int qflags) return temp; } - cclass = collsym = equiv = last_was_backslash = 0; + cclass = collsym = equiv = 0; for (i = j = 0; pathname[i]; i++) { - /* Fix for CTLESC at the end of the string? */ - if (pathname[i] == CTLESC && pathname[i+1] == '\0') - { - temp[j++] = pathname[i++]; - break; - } - /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an - ERE special character, so we should just be able to pass it through. */ - else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) - { - i++; - temp[j++] = pathname[i]; - continue; - } - else if (pathname[i] == CTLESC) + if (pathname[i] == '\\' && pathname[i+1] == CTLESC) + { + if (pathname[i+2] == CTLESC || pathname[i+2] == CTLNUL) + /* If the unquoted backslash was quoting a literal CTLESC or CTLNUL + then just store the CTLESC or CTLNUL since neither is special + in a glob pattern or regex. Happens with e.g. ${slash}$'\1' */ + i += 2; + else + /* Otherwise, quote the backslash so that it does not alter the quoted + character (which will be processed on the following iteration). + Happens with e.g. ${slash}"*" */ + temp[j++] = '\\'; + } + else if ((pathname[i] == '\\' || pathname[i] == CTLESC) && pathname[i+1]) { -convert_to_backslash: - cc = pathname[i+1]; - - if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/') - continue; - - /* What to do if preceding char is backslash? */ - + i++; /* We don't have to backslash-quote non-special ERE characters if - we're quoting a regexp. */ - if (cc != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (cc) == 0) - continue; - - /* We don't have to backslash-quote non-special BRE characters if - we're quoting a glob pattern. */ - if (cc != CTLESC && (qflags & QGLOB_REGEXP) == 0 && glob_char_p (pathname+i+1) == 0) - continue; - - /* If we're in a multibyte locale, don't bother quoting multibyte - characters. It matters if we're going to convert NFD to NFC on - macOS, and doesn't make a difference on other systems. */ - if (cc != CTLESC && locale_utf8locale && UTF8_SINGLEBYTE (cc) == 0) - continue; /* probably don't need to check for UTF-8 locale */ - - temp[j++] = '\\'; - i++; - if (pathname[i] == '\0') - break; + we're quoting a regexp, nor non-special glob characters if not. */ + if (((qflags & QGLOB_REGEXP) ? ere_char (pathname[i]) + : glob_char_p (pathname+i))) + temp[j++] = '\\'; } - else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/ + else if ((qflags & QGLOB_REGEXP) && pathname[i] == '[') /*]*/ { temp[j++] = pathname[i++]; /* open bracket */ savej = j; @@ -351,36 +327,6 @@ convert_to_backslash: i--; /* increment will happen above in loop */ continue; /* skip double assignment below */ } - else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0) - { - /* XXX - if not quoting regexp, use backslash as quote char. Should - We just pass it through without treating it as special? That is - what ksh93 seems to do. */ - - /* If we want to pass through backslash unaltered, comment out these - lines. */ - temp[j++] = '\\'; - - i++; - if (pathname[i] == '\0') - break; - /* If we are turning CTLESC CTLESC into CTLESC, we need to do that - even when the first CTLESC is preceded by a backslash. */ - if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) - i++; /* skip over the CTLESC */ - else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC) - /* A little more general: if there is an unquoted backslash in the - pattern and we are handling quoted characters in the pattern, - convert the CTLESC to backslash and add the next character on - the theory that the backslash will quote the next character - but it would be inconsistent not to replace the CTLESC with - another backslash here. We can't tell at this point whether the - CTLESC comes from a backslash or other form of quoting in the - original pattern. */ - goto convert_to_backslash; - } - else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP)) - last_was_backslash = 1; temp[j++] = pathname[i]; } endpat: -- 2.42.0