Re: enhancement merge request
On Sun, Apr 18, 2021, 7:13 PM Ananth Chellappa wrote: > Far as I understand, there is no > way to accomplish what I want - concisely : *get a true private mode* (no > logging to HISTFILE *OR* recall with history command after exiting > private-mode (toggle of history using set -/+ o) *without sacrificing > productivity*. > I think you can do something similar to the patch directly in the shell by temporarily unsetting HISTFILE and then deleting history entries in a certain range and restoring HISTFILE when you are ready, like: hist_pause() { [[ -z ${pause_histcmd-} ]] || return pause_histcmd=$HISTCMD old_histfile=$HISTFILE history -a # in case we exit w/o hist_resume HISTFILE= } hist_resume() { [[ -n ${pause_histcmd-} ]] || return local i for ((i=HISTCMD; i>pause_histcmd; i--)); do history -d $i done unset pause_histcmd HISTFILE=$old_histfile } Or, alternatively, unset HISTFILE, write out the current history to a temporary file (`history -w $file') and then later clear the history (`history -c'), read in the temporary file (`history -r $file'), and restore HISTFILE. The latter approach might be slightly more robust but, note that the history list will be re-numbered to start at 1 after restoring. >
segfault on history-search-*
history-search-* commands segfault on the devel branch since the size_t changes --- lib/readline/search.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/readline/search.c b/lib/readline/search.c index b7be876f..525c9c69 100644 --- a/lib/readline/search.c +++ b/lib/readline/search.c @@ -621,7 +621,7 @@ rl_history_search_reinit (int flags) if (rl_point) { /* Allocate enough space for anchored and non-anchored searches */ - if (_rl_history_search_len >= history_string_size - 2) + if (_rl_history_search_len + 2 >= history_string_size) { history_string_size = _rl_history_search_len + 2; history_search_string = (char *)xrealloc (history_search_string, history_string_size);
undo list free crash
After the changes in https://git.savannah.gnu.org/cgit/bash.git/commit/?h=devel&id=9e3495c9, I sometimes get segfaults when performing a bunch of history navigations followed by ^C. Small reproducer from some input fuzzing and debugger output below. cat >/tmp/hist <<'EOF' X XX EOF cat >/tmp/brc <<'EOF' bind -x '"\eI": kill -INT 0' EOF cat >/tmp/irc <<'EOF' "\e[A": history-search-backward set history-preserve-point on set revert-all-at-newline on EOF HISTFILE=/tmp/hist INPUTRC=/tmp/irc "$BASH" --rcfile /tmp/brc -i < <( printf 'XX' printf '\e[D\e[D\e[C\e[B\e[D\e[A\e[D\e[A\e[B\e[C\e[D\e[A\e[D\e[A' printf '\eI' ) * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x3567a068f1b8) frame #0: 0x0001045ce170 bash`rl_do_undo at undo.c:186:25 183 184 /* To better support vi-mode, a start or end value of -1 means 185 rl_point, and a value of -2 means rl_end. */ -> 186 if (rl_undo_list->what == UNDO_DELETE || rl_undo_list->what == UNDO_INSERT) 187 { 188 start = TRANS (rl_undo_list->start); 189 end = TRANS (rl_undo_list->end); Target 0: (bash) stopped. (lldb) bt * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x3567a068f1b8) * frame #0: 0x0001045ce170 bash`rl_do_undo at undo.c:186:25 frame #1: 0x0001045d82f0 bash`_rl_revert_previous_lines at misc.c:468:6 frame #2: 0x0001045d83a4 bash`_rl_revert_all_lines at misc.c:498:3 frame #3: 0x0001045a47bc bash`readline_internal_teardown(eof=1) at readline.c:507:5 frame #4: 0x0001045a4468 bash`readline_internal at readline.c:740:11 frame #5: 0x0001045a4320 bash`readline(prompt="bash-5.2$ ") at readline.c:387:11 frame #6: 0x0001044bf6b0 bash`yy_readline_get at parse.y:1564:31 frame #7: 0x0001044c58f0 bash`yy_getc at parse.y:1501:10 frame #8: 0x0001044c6290 bash`shell_getc(remove_quoted_newline=1) at parse.y:2396:8 frame #9: 0x0001044c4b48 bash`read_token(command=0) at parse.y:3436:23
global-buffer-overflow in parse.y
$ ./bash -c 'case x in x) if ((1)); then :; fi ;; esac' parse.y:974:82: runtime error: index -1 out of bounds for type 'int[257]' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior parse.y:974:82 in = ==52960==ERROR: AddressSanitizer: global-buffer-overflow READ of size 4 at 0x000100cf26dc thread T0 #0 0x1004b63c8 in yyparse parse.y:974 $ ./bash -c 'case x in x) if ((1)); then :; fi esac' parse.y:979:82: runtime error: index -1 out of bounds for type 'int[257]' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior parse.y:979:82 in = ==52850==ERROR: AddressSanitizer: global-buffer-overflow READ of size 4 at 0x000100b0e6dc thread T0 #0 0x1002d2808 in yyparse parse.y:979 both of these are like: |CASE WORD newline_list IN case_clause ESAC { $$ = make_case_command ($2, $5, word_lineno[word_top]); if (word_top >= 0) word_top--; } and word_top == -1
asan report in bash_add_history
./bash --norc -in <<<$'\\\n.' bashhist.c:899:8: runtime error: addition of unsigned offset to 0x00010700d190 overflowed to 0x00010700d18f ERROR: AddressSanitizer: heap-buffer-overflow on address 0x00010700d18f at pc 0x0001045fe1b8 bp 0x00016bb1f350 sp 0x00016bb1f348 READ of size 1 at 0x00010700d18f thread T0 frame #5: 0x0001045fe1b8 bash`bash_add_history(line=".") at bashhist.c:899:8 frame #6: 0x0001045fd0c8 bash`maybe_add_history(line=".") at bashhist.c:759:2 frame #7: 0x0001045fca34 bash`pre_process_line(line=".", print_changes=1, addit=1) at bashhist.c:628:5 frame #8: 0x00010432df50 bash`shell_getc(remove_quoted_newline=1) at parse.y:2508:17 frame #9: 0x00010432786c bash`read_token(command=0) at parse.y:3432:23 (lldb) fr s 5 frame #5: 0x0001045fe1b8 bash`bash_add_history(line=".") at bashhist.c:899:8 896 curlen = strlen (current->line); 897 898 if (dstack.delimiter_depth == 0 && current->line[curlen - 1] == '\\' && -> 899 current->line[curlen - 2] != '\\') 900 { 901 current->line[curlen - 1] = '\0'; 902 curlen--; (lldb) fr v current->line curlen (char *) current->line = 0x00010700d190 "\\" (size_t) curlen = 1
asan report in spname
happens when attempting spell-correct-word on an empty line ./bash --norc -in <<<$'\030s' ERROR: AddressSanitizer: heap-buffer-overflow on address 0x000102e0d0d1 at pc 0x0001004ccf64 bp 0x00016fdf0e30 sp 0x00016fdf0e28 READ of size 1 at 0x000102e0d0d1 thread T0 frame #5: 0x0001004ccf64 bash`spname(oldname="", newname="") at spell.c:78:8 frame #6: 0x0001004cde0c bash`dirspell(dirname="") at spell.c:195:11 frame #7: 0x000100335a10 bash`bash_spell_correct_shellword(count=1, key=115) at bashline.c:1348:16 frame #8: 0x0001004fe830 bash`_rl_dispatch_subseq(key=115, map=0x0001007e0360, got_subseq=0) at readline.c:922:8 frame #9: 0x0001005009dc bash`_rl_dispatch_subseq(key=24, map=0x0001007ddb20, got_subseq=0) at readline.c:1068:8 frame #10: 0x0001004fc434 bash`_rl_dispatch(key=24, map=0x0001007ddb20) at readline.c:866:10 (lldb) fr s 5 frame #5: 0x0001004ccf64 bash`spname(oldname="", newname="") at spell.c:78:8 75if (*op == '\0')/* Exact or corrected */ 76 { 77/* `.' is rarely the right thing. */ -> 78if (oldname[1] == '\0' && newname[1] == '\0' && 79 oldname[0] != '.' && newname[0] == '.') 80 return -1; 81return strcmp(oldname, newname) != 0; (lldb) fr v oldname (char *) oldname = 0x000102e0d0d0 ""
asan report+fix in sh_mkdoublequoted
mkdir -p /tmp/bin >'/tmp/bin/$' chmod +x '/tmp/bin/$' PATH=/tmp/bin ./bash --norc -in <<<$'\e*' ERROR: AddressSanitizer: heap-buffer-overflow on address 0x0001039a9913 at pc 0x0001004d57b4 bp 0x00016fdf1350 sp 0x00016fdf1348 WRITE of size 1 at 0x0001039a9913 thread T0 frame #5: 0x0001004d57b4 bash`sh_mkdoublequoted(s="", slen=1, flags=1) at shquote.c:211:6 frame #6: 0x0001003410e4 bash`bash_quote_filename(s="$", rtype=1, qcp="") at bashline.c:4301:15 frame #7: 0x000100554b30 bash`make_quoted_replacement(match="$", mtype=1, qc="") at complete.c:1797:16 frame #8: 0x000100549aec bash`insert_all_matches(matches=0x000106600200, point=0, qc="") at complete.c:1945:9 frame #9: 0x00010053c63c bash`rl_complete_internal(what_to_do=42) at complete.c:2144:7 frame #10: 0x00010053d450 bash`rl_insert_completions(ignore=1, invoking_key=42) at complete.c:466:11 frame #5: 0x0001004d57b4 bash`sh_mkdoublequoted(s="", slen=1, flags=1) at shquote.c:211:6 208 *r++ = *s++; 209 } 210 *r++ = '"'; -> 211 *r = '\0'; 212 213 return ret; 214 } (lldb) fr v ret rlen (char *) ret = 0x0001039a9910 "\"$\"" (size_t) rlen = 3 diff --git a/lib/sh/shquote.c b/lib/sh/shquote.c index a27b9202..98b3d927 100644 --- a/lib/sh/shquote.c +++ b/lib/sh/shquote.c @@ -188,7 +188,7 @@ sh_mkdoublequoted (const char *s, size_t slen, int flags) send = s + slen; mb_cur_max = flags ? MB_CUR_MAX : 1; - rlen = (flags == 0) ? slen + 3 : (2 * slen) + 1; + rlen = (flags == 0) ? slen + 3 : (2 * slen) + 3; ret = r = (char *)xmalloc (rlen); *r++ = '"';
Re: command_not_found_handle not run when PATH is empty
On Wed, Mar 8, 2023 at 4:28 PM Moshe Looks wrote: > This is very old code and I have no idea what the original intention > was in returning 'name' here or if doing so is still performing useful > work, but the tests pass at least. A null $PATH component is treated as the current directory, and bash treats an empty $PATH as having only a single null component, so we can't just fail to return a command when $PATH is empty. The current code's behavior of returning the path as provided when $PATH is empty accomplishes the goal of "finding" the path in $PATH but ignores any flags that would normally limit the results to files that exist/are executable/etc. This leads to weirdness not just with command_not_found_handle but with command name completion and `type -a' and `command output as well. $ cd /bin $ PATH= compgen -c -X '!bash' $ PATH= type -a bash -bash: type: bash: not found I think it might make sense to change code that looks at the value of PATH to explicitly treat an empty value as `.' so that all such behavior is consistent. diff --git a/bashline.c b/bashline.c index 3238b13a..e0d6107c 100644 --- a/bashline.c +++ b/bashline.c @@ -2063,6 +2063,8 @@ command_word_completion_function (const char *hint_text, int state) dequoted_hint = bash_dequote_filename (hint, 0); path = get_string_value ("PATH"); + if (path == 0 || *path == '\0') + path = "."; path_index = dot_in_path = 0; /* Initialize the variables for each type of command word. */ diff --git a/findcmd.c b/findcmd.c index 186871ac..b413c870 100644 --- a/findcmd.c +++ b/findcmd.c @@ -256,13 +256,13 @@ _find_user_command_internal (const char *name, int flags) /* Search for the value of PATH in both the temporary environments and in the regular list of variables. */ - if (var = find_variable_tempenv ("PATH")) /* XXX could be array? */ -path_list = value_cell (var); + if (var = find_variable_tempenv ("PATH")) +path_list = get_variable_value (var); else -path_list = (char *)NULL; +path_list = 0; if (path_list == 0 || *path_list == '\0') -return (savestring (name)); +path_list = "."; cmd = find_user_command_in_path (name, path_list, flags, (int *)0); @@ -363,11 +363,14 @@ search_for_command (const char *pathname, int flags) { if (flags & CMDSRCH_STDPATH) path_list = conf_standard_path (); - else if (temp_path || path) - path_list = value_cell (path); + else if (path) + path_list = get_variable_value (path); else path_list = 0; + if (path_list == 0 || *path_list == '\0') + path_list = "."; + command = find_user_command_in_path (pathname, path_list, FS_EXEC_PREFERRED|FS_NODIRS, &st); if (command && hashing_enabled && temp_path == 0 && (flags & CMDSRCH_HASH)) @@ -440,7 +443,9 @@ user_command_matches (const char *name, int flags, int state) if (stat (".", &dotinfo) < 0) dotinfo.st_dev = dotinfo.st_ino = 0; /* so same_file won't match */ path_list = get_string_value ("PATH"); -path_index = 0; + if (path_list == 0 || *path_list == '\0') + path_list = "."; + path_index = 0; } while (path_list && path_list[path_index])
use-after-free in read_token_word
./bash --norc -O noexpand_translation -in <<<'$":"' =ERROR: AddressSanitizer: heap-use-after-free on address 0x000108102b40 READ of size 1 thread T0 #0 read_token_word parse.y:5236 #1 read_token parse.y:3618 freed by thread T0 here: #1 read_token_word parse.y:5231 #2 read_token parse.y:3618 diff --git a/parse.y b/parse.y index e3516e2d..0a8c039a 100644 --- a/parse.y +++ b/parse.y @@ -5228,15 +5228,19 @@ read_token_word (int character) /* PST_NOEXPAND */ /* Try to locale-expand the converted string. */ ttrans = locale_expand (ttok, 0, ttoklen - 1, first_line, &ttranslen); - free (ttok); - /* Add the double quotes back (or single quotes if the user has set that option). */ if (singlequote_translations && ((ttoklen - 1) != ttranslen || STREQN (ttok, ttrans, ttranslen) == 0)) - ttok = sh_single_quote (ttrans); + { + free (ttok); + ttok = sh_single_quote (ttrans); + } else - ttok = sh_mkdoublequoted (ttrans, ttranslen, 0); + { + free (ttok); + ttok = sh_mkdoublequoted (ttrans, ttranslen, 0); + } free (ttrans); ttrans = ttok;
Re: use-after-free in read_token_word
Also in parse_matched_pair: diff --git a/parse.y b/parse.y index 0a8c039a..1001ac1b 100644 --- a/parse.y +++ b/parse.y @@ -3906,14 +3906,13 @@ parse_matched_pair (int qc, int open, int close, size_t *lenp, int flags) /* Locale expand $"..." here. */ /* PST_NOEXPAND */ ttrans = locale_expand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); - free (nestret); - /* If we're supposed to single-quote translated strings, check whether the translated result is different from the original and single-quote the string if it is. */ if (singlequote_translations && ((nestlen - 1) != ttranslen || STREQN (nestret, ttrans, ttranslen) == 0)) { + free (nestret); if ((rflags & P_DQUOTE) == 0) nestret = sh_single_quote (ttrans); else if ((rflags & P_DQUOTE) && (dolbrace_state == DOLBRACE_QUOTE2) && (flags & P_DOLBRACE)) @@ -3923,7 +3922,10 @@ parse_matched_pair (int qc, int open, int close, size_t *lenp, int flags) nestret = sh_backslash_quote_for_double_quotes (ttrans, 0); } else - nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); + { + free (nestret); + nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); + } free (ttrans); nestlen = strlen (nestret); retind -= 2; /* back up before the $" */
double-free in bashline.c
A few functions in bashline.c free static variables but do not assign to them until after calling bash_tilde_expand, which may throw_to_top_level. If SIGINT is received at an inopportune time, these variables may be free-d again. diff --git a/bashline.c b/bashline.c index 2745c4dd..b5c0a49f 100644 --- a/bashline.c +++ b/bashline.c @@ -1970,6 +1970,7 @@ command_word_completion_function (const char *hint_text, int state) free (dequoted_hint); if (hint) free (hint); + dequoted_hint = hint = (char *)NULL; mapping_over = searching_path = 0; hint_is_dir = CMD_IS_DIR (hint_text); @@ -2252,6 +2253,7 @@ globword: free (fnhint); if (filename_hint) free (filename_hint); + fnhint = filename_hint = (char *)NULL; filename_hint = sh_makepath (current_path, hint, 0); /* Need a quoted version (though it doesn't matter much in most @@ -2397,7 +2399,10 @@ command_subst_completion_function (const char *text, int state) start_len = text - orig_start; filename_text = savestring (text); if (matches) - free (matches); + { + free (matches); + matches = (char **)NULL; + } /* * At this point we can entertain the idea of re-parsing @@ -3873,9 +3878,11 @@ glob_complete_word (const char *text, int state) { rl_filename_completion_desired = 1; FREE (matches); + matches = (char **)NULL; if (globorig != globtext) FREE (globorig); FREE (globtext); + globorig = globtext = (char *)NULL; ttext = bash_tilde_expand (text, 0);
rl_filename_quoting_function restoration
bash_glob_complete_word modifies rl_filename_quoting_function, which can fail to be restored if bash_tilde_expand handles a SIGINT while glob-complete-word is running Handling the restore in bashline_reset() seems to solve the issue: diff --git a/bashline.c b/bashline.c index 2745c4dd..7c3812eb 100644 --- a/bashline.c +++ b/bashline.c @@ -685,6 +685,7 @@ bashline_reset (void) complete_fullquote = 1; rl_filename_quote_characters = default_filename_quote_characters; set_filename_bstab (rl_filename_quote_characters); + rl_filename_quoting_function = bash_quote_filename; set_directory_hook (); rl_filename_stat_hook = bash_filename_stat_hook;
Re: global-buffer-overflow in parse.y
On Mon, Mar 6, 2023 at 9:16 AM Chet Ramey wrote: > Thanks for the report. It's the specific combination of `if' and the `((' > command that causes the problem. Looks like same thing also happens when `if' is followed by a newline ./bash -c $'case $LINENO in 0) if\n:; then echo FAIL; fi esac' bash/parse.y:980:82: runtime error: index -1 out of bounds for type 'int[257]' #0 0x100759bcc in yyparse parse.y:980
asan report in extmatch
The relevant code was added in https://git.savannah.gnu.org/cgit/bash.git/commit/?id=da43077 with similar additions to both gmatch and extmatch, but I suspect the test on line 912 was not meant to be in extmatch: > .a bash -O extglob -O dotglob -c ': ./!(.foo)' ERROR: AddressSanitizer: heap-buffer-overflow on address 0x000102e02daf READ of size 1 at 0x000102e02daf thread T0 #0 extmatch sm_loop.c:912 frame #5: bash`extmatch(xc=33, s=".a", se="", p="(.foo)", pe="", flags=161) at sm_loop.c:912:36 909 910 if (m1 == 0 && (flags & FNM_DOTDOT) && 911 (SDOT_OR_DOTDOT (s) || -> 912((flags & FNM_PATHNAME) && s[-1] == L('/') && PDOT_OR_DOTDOT(s 913 return (FNM_NOMATCH); 914 915 /* if srest > s, we are not at start of string */
EXIT trap definition order
It seems that if a trap handler for a terminating signal resends its own signal (after resetting the signal disposition), any configured EXIT trap will be executed (as I think is expected), but only if an EXIT trap had already been set prior to the the first instance of a trap having been set for the terminating signal in question. That is, this script will print "EXIT": trap 'echo EXIT' EXIT trap 'trap TERM; kill 0' TERM kill 0 Whereas this one will not: trap 'trap TERM; kill 0' TERM trap 'echo EXIT' EXIT kill 0
compgen respecting dotglob changes
compgen's glob matching does not respect dotglob being turned on or off unless some other globbing takes place between the setting being changed and compgen being called: $ cd "$(mktemp -d)" $ > .x $ bash -c 'shopt -s dotglob; compgen -G \*' $ bash -c 'shopt -s dotglob; : *; compgen -G \*' .x
history file missing timestamp when HISTFILESIZE reached
When HISTTIMEFORMAT is set and history file truncation is performed, the first line of the history file (i.e. the timestamp of the first entry) seems to always be missing > /tmp/hist HISTTIMEFORMAT= HISTFILESIZE=3 HISTFILE=/tmp/hist bash --norc -in <<<$'1\n2\n3' $ cat /tmp/hist 1 #1679274410 2 #1679274410 3
parsing command substitution inside parameter expansion in interactive shell
If a command substitution inside a parameter expansion has a command followed by a newline, bash prints an error message (though the command is parsed and saved in the history list correctly): bash --norc -in <<<$'${_+$(:\n)}\n!!' $ ${_+$(: bash: command substitution: line 3: unexpected EOF while looking for matching `)' > )} $ !! ${_+$(:; )} If the command substitution starts with a newline, no error is printed but the command is _not_ saved correctly to the history list (a semicolon is inserted at the start): bash --norc -in <<<$'${_+$(\n:)}\n!!' $ ${_+$( > :)} $ !! ${_+$(; :)}
[PATCH] Save more readline state when running compgen
Completion state is not fully restored after invoking `compgen' within a competition function. Normally, if a compspec does not specifically include one of the options that triggers filename completion, the generated completions are not treated as filenames: $ complete -W '/tmp /var' cmd $ cmd /[TAB] /tmp /var However, if a completion function is invoked, and that function happens to call `compgen' with one of the options that triggers filename completion, the generated completions _are_ treated as filenames (i.e. get quoted, have `/' appended to directories, etc.) $ fun() { compgen -f >/dev/null; } $ complete -W '/tmp /var' -F fun cmd $ cmd /[TAB] tmp/ var/ diff --git a/builtins/complete.def b/builtins/complete.def index 0d9b9293..37eb20ba 100644 --- a/builtins/complete.def +++ b/builtins/complete.def @@ -650,7 +650,7 @@ compgen_builtin (WORD_LIST *list) STRINGLIST *sl; char *word, **matches; char *old_line; - int old_ind; + int old_ind, old_completion, old_quoting, old_suppress; if (list == 0) return (EXECUTION_SUCCESS); @@ -692,6 +692,10 @@ compgen_builtin (WORD_LIST *list) rval = EXECUTION_FAILURE; + old_completion = rl_filename_completion_desired; + old_quoting = rl_filename_quoting_desired; + old_suppress = rl_completion_suppress_append; + /* probably don't have to save these, just being safe */ old_line = pcomp_line; old_ind = pcomp_ind; @@ -720,6 +724,10 @@ compgen_builtin (WORD_LIST *list) strvec_dispose (matches); } + rl_filename_completion_desired = old_completion; + rl_filename_quoting_desired = old_quoting; + rl_completion_suppress_append = old_suppress; + if (sl) { if (sl->list && sl->list_len)
wait on procsub in EXIT trap
If an EXIT trap is executed after receipt of a terminating signal, waiting on a process substitution within the trap can fail: $ (trap 'wait $!; echo $?' EXIT; : <(:); kill 0) -bash: wait: pid 83694 is not a child of this shell 127 Interestingly, if an external command or a subshell is executed after the process substitution is started but prior to receipt of the signal, the `wait' works fine: $ (trap 'wait $!; echo $?' EXIT; : <(:); (:); kill 0) 0
Re: EXIT trap definition order
On Mon, Mar 20, 2023 at 10:57 AM Chet Ramey wrote: > > On 3/17/23 1:28 PM, Grisha Levit wrote: > > It seems that if a trap handler for a terminating signal resends its > > own signal (after resetting the signal disposition), any configured > > EXIT trap will be executed (as I think is expected), but only if an > > EXIT trap had already been set prior to the the first instance of a > > trap having been set for the terminating signal in question. > > If the shell is handling a terminating signal and receives another instance > of that signal while doing so, it takes that as an indication to exit > immediately. Thanks, that makes sense. But then the first script should _not_ print EXIT, right? i.e. if both TERM and EXIT traps are defined, it shouldn't matter which one happened to be defined first.
Re: wait on procsub in EXIT trap
On Tue, Mar 21, 2023 at 3:28 PM Chet Ramey wrote: > > > Interestingly, if an external command or a subshell is executed after > > the process substitution is started but prior to receipt of the > > signal, the `wait' works fine: > > > > $ (trap 'wait $!; echo $?' EXIT; : <(:); (:); kill 0) > > Because the procsub gets reaped before the terminating signal arrives. Oh I see, thanks. So to wait for a procsub after receiving a terminating signal, one must do so in a trap for the signal itself and not in an EXIT trap.
Re: [PATCH] Save more readline state when running compgen
On Tue, Mar 21, 2023 at 3:47 PM Chet Ramey wrote: > OK, say you did in fact run compgen in the foreground -- very unusual > because it's awkward to capture the possible completions that way -- to > generate completions. Sorry I should have explained. I hit this issue because I was using compgen in an unorthodox way, just to check if some files matching a glob exist. Basically something like compgen -G 'foo/*' >/dev/null && COMPREPLY=(bar) This can be a lot faster than expanding the glob into an array and then testing that the array is not empty.
Re: [PATCH] Save more readline state when running compgen
On Tue, Mar 21, 2023 at 4:11 PM Chet Ramey wrote: > OK, which do you think would be the more common case? Wanting the options > used to generate completions to persist or using it in this way? Usually people do `COMPREPLY=($(compgen ...))' (or write to stdout in a command specified with `complete -C') so nothing persists anyway, right? I don't know if there's really any use case that generates completions to place into COMPREPLY by running compgen in the foreground so it's hard for me to imagine a use case that benefits from the readline state being modified.
Re: [PATCH] Save more readline state when running compgen
On Tue, Mar 21, 2023 at 5:26 PM alex xmb ratchev wrote: > On Tue, Mar 21, 2023, 21:05 Grisha Levit wrote: >> >> compgen -G 'foo/*' >/dev/null && COMPREPLY=(bar) > > i dont get that code at all , but i like idea of speedier file filling .. can > u explain some in short ? Let's say you want to know if there are any entries starting with `foo' in the current directory. You can do: GLOBIGNORE= shopt -s nullglob set +o noglob tmp=(foo*) if (( ${#tmp[@]} )); then ... Or you can do if compgen -G 'foo*' >/dev/null; then ... The latter doesn't care about nullglob / noglob / GLOBIGNORE and if the number of files is large can be a lot faster. P.S. If you are interested in files that may or may not start with a dot, you do need to set dotglob in either case, which was the motivation for https://lists.gnu.org/archive/html/bug-bash/2023-03/msg00062.html
Re: [PATCH] Save more readline state when running compgen
On Tue, Mar 21, 2023 at 5:52 PM alex xmb ratchev wrote: > On Tue, Mar 21, 2023, 22:42 Grisha Levit wrote: >> Let's say you want to know if there are any entries starting with >> `foo' in the current directory. You can do: > > i see , thank you sir > i .. mostly dont have that case ( if existing n no more ) > i always along the paths , to be used The most common use case would probably be to see if a directory is empty or not, like: compgen -G 'dir/@(*|.*)' >/dev/null || echo 'dir is empty' I don't see it mentioned in https://mywiki.wooledge.org/BashFAQ/004 but I think it's nice because you don't need subshells and don't have to fiddle with shopt, etc.
Re: parsing command substitution inside parameter expansion in interactive shell
On Mon, Mar 20, 2023 at 4:59 PM Chet Ramey wrote: > Thanks, it's an easy fix to preserve the newline here. FWIW even with the latest fixes, this kind of nesting in a history entry still triggers ASAN: bash --norc -in <<<$'${_+$(\n \cP\en ' ERROR: AddressSanitizer: heap-buffer-overflow READ of size 1 at 0x000105c0c2cf thread T0 #0 bash_add_history bashhist.c:898 #1 maybe_add_history bashhist.c:759 #2 pre_process_line bashhist.c:628 #3 shell_getc parse.y:2501 #4 read_token parse.y:3425 #5 yylex parse.y:2915 #6 yyparse y.tab.c:1869 #7 parse_comsub parse.y:4306 #8 parse_matched_pair parse.y:3973 #9 read_token_word parse.y:5172 #10 read_token parse.y:3621 #11 yylex parse.y:2915 #12 yyparse y.tab.c:1869 #13 parse_command eval.c:345 #14 read_command eval.c:389 #15 reader_loop eval.c:139 #16 main shell.c:821 Avoided by: diff --git a/bashhist.c b/bashhist.c index 21c77058..b48306c2 100644 --- a/bashhist.c +++ b/bashhist.c @@ -895,7 +895,7 @@ bash_add_history (char *line) newline, since that is what happens when the line is parsed. */ curlen = strlen (current->line); - if (dstack.delimiter_depth == 0 && current->line[curlen - 1] == '\\' && + if (curlen && dstack.delimiter_depth == 0 && current->line[curlen - 1] == '\\' && (curlen < 2 || current->line[curlen - 2] != '\\')) { current->line[curlen - 1] = '\0'; bash --norc -in <<<$'${_+$(\n\e.' ERROR: AddressSanitizer: heap-buffer-overflow READ of size 1 at 0x00010870c517 thread T0 #0 history_tokenize_word histexpand.c:1561 #1 history_tokenize_internal histexpand.c:1660 #2 history_tokenize histexpand.c:1693 #3 history_arg_extract histexpand.c:1435 #4 rl_yank_nth_arg_internal kill.c:628 #5 rl_yank_last_arg kill.c:698 #6 _rl_dispatch_subseq readline.c:922 Avoided by: diff --git a/lib/readline/histexpand.c b/lib/readline/histexpand.c index d21240bf..8a147e40 100644 --- a/lib/readline/histexpand.c +++ b/lib/readline/histexpand.c @@ -1603,6 +1603,8 @@ get_word: delimopen = '('; delimiter = ')'; nestdelim = 1; + if (string[i] == 0) +break; continue; }
segfault in hostnames_matching
another size_t issue diff --git a/bashline.c b/bashline.c index 0047caef..9df26d2e 100644 --- a/bashline.c +++ b/bashline.c @@ -919,7 +919,7 @@ hostnames_matching (const char *text) continue; /* OK, it matches. Add it to the list. */ - if (nmatch >= (rsize - 1)) + if ((nmatch + 1) >= rsize) { rsize = (rsize + 16) - (rsize % 16); result = strvec_resize (result, rsize);
size_t issue in expand_string_dollar_quote
bash --norc -in <<<$'"\e\cE' ERROR: AddressSanitizer: negative-size-param: (size=-1) #0 wrap_strncpy+0x228 #1 expand_string_dollar_quote subst.c:4108 #2 shell_expand_line bashline.c:2887 probably not the cleanest fix but the issue is here: diff --git a/subst.c b/subst.c index 2ff9b7c2..35c0fdd1 100644 --- a/subst.c +++ b/subst.c @@ -4100,7 +4100,7 @@ expand_string_dollar_quote (const char *string, int flags) news = skip_single_quoted (string, slen, ++sindex, SX_COMPLETE); else news = skip_double_quoted (string, slen, ++sindex, SX_COMPLETE); - translen = news - sindex - 1; + translen = (news > sindex) ? news - sindex - 1 : 0; RESIZE_MALLOCED_BUFFER (ret, retind, translen + 3, retsize, 64); ret[retind++] = c; if (translen > 0)
[PATCH] fix bcopy params
--- lib/sh/oslib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sh/oslib.c b/lib/sh/oslib.c index ab7924df..85f7b491 100644 --- a/lib/sh/oslib.c +++ b/lib/sh/oslib.c @@ -161,7 +161,7 @@ getdtablesize (void) # undef bcopy # endif void -bcopy (void *s, *d, size_t n) +bcopy (void *s, void *d, size_t n) { FASTCOPY (s, d, n); } -- 2.40.0
bug-bash@gnu.org
If $var is non-empty and not a valid a number, {var}>&- silently closes fd 0. var=x; (exec {var}>&-; test -e /dev/fd/0); echo $? 1 Seems like the test of the return value of legal_number is incorrect. --- redir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redir.c b/redir.c index 804d9b82..267a9fc8 100644 --- a/redir.c +++ b/redir.c @@ -1459,7 +1459,7 @@ redir_varvalue (REDIRECT *redir) if (val == 0 || *val == 0) return -1; - if (legal_number (val, &vmax) < 0) + if (legal_number (val, &vmax) <= 0) return -1; i = vmax; /* integer truncation */ -- 2.40.0
parameter expansion assignment to array index nameref
$ declare -n ref=arr[1] $ arr=(A); (echo "${ref=X} ${ref}") A X $ arr=( ); (echo "${ref=X} ${ref}") Segmentation fault: 11 Expanding the nameref while still in parameter_brace_expand seems to do the right thing though I can't say I'm fully confident in this. diff --git a/subst.c b/subst.c index 555d18db..64f4d6da 100644 --- a/subst.c +++ b/subst.c @@ -9260,6 +9260,7 @@ parameter_brace_expand (char *string, int *indexp, int quoted, int pflags, int * int want_substring, want_indir, want_patsub, want_casemod, want_attributes; char *name, *value, *temp, *temp1; WORD_DESC *tdesc, *ret; + SHELL_VAR *var; int t_index, sindex, c, tflag, modspec, local_pflags, all_element_arrayref; intmax_t number; array_eltstate_t es; @@ -9583,6 +9584,13 @@ parameter_brace_expand (char *string, int *indexp, int quoted, int pflags, int * return (temp == &expand_param_error ? &expand_wdesc_error : &expand_wdesc_fatal); } + if ((var = find_variable_last_nameref (name, 0)) && nameref_p (var) && + (temp1 = nameref_cell (var)) && *temp1) +{ + FREE (name); + name = savestring (temp1); +} + #if defined (ARRAY_VARS) if (valid_array_reference (name, 0)) {
[PATCH] compgen -V option (store output in array)
Since the predominant use case for compgen is generating output that then gets split back up into an array, it seems like it would be nice to have an option that avoids the extra steps (and their associated pitfalls) From e75306bd43d4ed233a7fedddb060a6bdd9305fdc Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Thu, 13 Apr 2023 04:14:18 -0400 Subject: [PATCH] compgen -V option --- builtins/complete.def| 72 externs.h| 1 + lib/readline/doc/rluser.texi | 11 -- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/builtins/complete.def b/builtins/complete.def index 881c4711..0c68aec1 100644 --- a/builtins/complete.def +++ b/builtins/complete.def @@ -87,7 +87,7 @@ struct _optflags { static int find_compact (char *); static int find_compopt (char *); -static int build_actions (WORD_LIST *, struct _optflags *, unsigned long *, unsigned long *); +static int build_actions (WORD_LIST *, struct _optflags *, unsigned long *, unsigned long *, char **); static int remove_cmd_completions (WORD_LIST *); @@ -177,9 +177,10 @@ find_compopt (char *name) /* Build the actions and compspec options from the options specified in LIST. ACTP is a pointer to an unsigned long in which to place the bitmap of actions. OPTP is a pointer to an unsigned long in which to place the - bitmap of compspec options (arguments to `-o'). PP, if non-null, gets 1 - if -p is supplied; RP, if non-null, gets 1 if -r is supplied. - If either is null, the corresponding option generates an error. + bitmap of compspec options (arguments to `-o'). FLAGP is a pointer to a + struct that stores non-action binary options. VNAMEP is a pointer to a + string to store the name of the output variable. + If any of the above is null, the corresponding option generates an error. This also sets variables corresponding to options that take arguments as a side effect; the caller should ensure that those variables are set to NULL before calling build_actions. Return value: @@ -189,7 +190,7 @@ find_compopt (char *name) */ static int -build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, unsigned long *optp) +build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, unsigned long *optp, char **vnamep) { int opt, ind, opt_given; unsigned long acts, copts; @@ -199,7 +200,7 @@ build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, un opt_given = 0; reset_internal_getopt (); - while ((opt = internal_getopt (list, "abcdefgjko:prsuvA:G:W:P:S:X:F:C:DEI")) != -1) + while ((opt = internal_getopt (list, "abcdefgjko:prsuvA:G:W:P:S:X:F:C:V:DEI")) != -1) { opt_given = 1; switch (opt) @@ -341,6 +342,25 @@ build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, un case 'S': Sarg = list_optarg; break; +case 'V': +#if defined (ARRAY_VARS) + if (vnamep) + { + *vnamep = list_optarg; + if (legal_identifier (*vnamep) == 0) + { + sh_invalidid (*vnamep); + return (EX_USAGE); + } + } + else +#endif + { + sh_invalidopt ("-V"); + builtin_usage (); + return (EX_USAGE); + } + break; case 'W': Warg = list_optarg; break; @@ -385,7 +405,7 @@ complete_builtin (WORD_LIST *list) /* Build the actions from the arguments. Also sets the [A-Z]arg variables as a side effect if they are supplied as options. */ - rval = build_actions (list, &oflags, &acts, &copts); + rval = build_actions (list, &oflags, &acts, &copts, (char **)NULL); if (rval == EX_USAGE) return (rval); opt_given = rval != EXECUTION_FAILURE; @@ -630,12 +650,13 @@ print_cmd_completions (WORD_LIST *list) $BUILTIN compgen $DEPENDS_ON PROGRAMMABLE_COMPLETION $FUNCTION compgen_builtin -$SHORT_DOC compgen [-abcdefgjksuv] [-o option] [-A action] [-G globpat] [-W wordlist] [-F function] [-C command] [-X filterpat] [-P prefix] [-S suffix] [word] +$SHORT_DOC compgen [-abcdefgjksuv] [-o option] [-A action] [-G globpat] [-W wordlist] [-F function] [-C command] [-X filterpat] [-P prefix] [-S suffix] [-V var] [word] Display possible completions depending on the options. Intended to be used from within a shell function generating possible completions. If the optional WORD argument is supplied, matches against -WORD are generated. +WORD are generated. With the -V option, completions are stored in the +indexed array VAR. Exit Status: Returns success unless an invalid option is supplied or an error occurs. @@ -651,17 +672,21 @@ compgen_builtin (WORD_LIST *list) char *word, **matches; char *old_line; int old_ind, old_completion, old_quoting, old_suppress; + char *varname; + SHELL_VAR *var; + WORD_LIST *alist; if (list == 0) return (EXECUTION_SUCCESS); acts
[PATCH] support getconf builtin build without confstr
Currently, the getconf build fails on platforms (e.g. Android) without confstr(3) From c1a616fd5ff18a202ad934bbb23ba4d58685 Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Thu, 13 Apr 2023 04:32:04 -0400 Subject: [PATCH] support getconf builtin build without confstr --- examples/loadables/getconf.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/loadables/getconf.c b/examples/loadables/getconf.c index 0dc213be..cd89b82f 100644 --- a/examples/loadables/getconf.c +++ b/examples/loadables/getconf.c @@ -47,7 +47,15 @@ struct conf { const char *name; const long call_name; /* or value for CONSTANT */ -const enum { SYSCONF, CONFSTR, PATHCONF, CONSTANT, UNDEFINED } call; +const enum { +SYSCONF, +#if HAVE_CONFSTR +CONFSTR, +#endif +PATHCONF, +CONSTANT, +UNDEFINED +} call; }; static const struct conf vars[] = @@ -489,8 +497,10 @@ static const struct conf vars[] = { "POSIX2_UPE", _SC_2_UPE, SYSCONF }, { "POSIX2_VERSION", _SC_2_VERSION, SYSCONF }, +#ifdef _CS_PATH { "PATH", _CS_PATH, CONFSTR }, { "CS_PATH", _CS_PATH, CONFSTR }, +#endif /* LFS */ #ifdef _CS_LFS_CFLAGS @@ -1072,6 +1082,7 @@ getconf_print (const struct conf *c, const char *vpath, int all) printf ("%ld\n", value); return (EXECUTION_SUCCESS); +#if HAVE_CONFSTR case CONFSTR: errno = 0; clen = confstr (cn, (char *) NULL, 0); @@ -1092,6 +1103,7 @@ getconf_print (const struct conf *c, const char *vpath, int all) printf ("%.*s\n", (int) clen, cvalue); free (cvalue); return (EXECUTION_SUCCESS); +#endif case CONSTANT: return (getconf_internal (c, all)); -- 2.40.0
[PATCH] allow quoting completions w/o filename treatment
Currently, there isn't any way to request quoting of completion matches without getting also the attendant filename treatment. In practice, authors of completion functions that need the matches to be quoted often use `-o filenames', which works fine unless the matches include slashes or happen to match existing directory names. Alternatively, some authors opt to do their own quoting of matches but doing so properly for partial word completion is tricky as it requires accounting for the quoting style chosen by the user when they typed the partial word. The attached patch hopefully addresses these difficulties by allowing for the decoupling of filename-specific match handling from match quoting. It adds a new completion option `allquote' (I'm not quite happy with the name) which controls the corresponding new readline variable rl_all_quoting_desired. From 8049e9f920fde4f84422fd651ba4a4beeafb7f10 Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Fri, 14 Apr 2023 16:10:22 -0400 Subject: [PATCH] allow quoting completions w/o filename treatment lib/readline/complete.c - rl_all_quoting_desired: new variable to tell readline to quote matches even if not doing filename completion - QUOTING_DESIRED: new macro for checking if quoting is enabled - change checks of filename comoletion && filename quoting to use QUOTING_DESIRED. - set_completion_defaults: set rl_all_quoting_desired to 0 lib/readline/readline.h - rl_all_quoting_desired: extern declaration lib/readline/doc/rltech.texi - document rl_all_quoting_desired pcomplete.h - COPT_ALLQUOTE: new value for compspec option, corresponds to rl_all_quoting_desired pcomplete.c - pcomp_set_readline_variables: handle COPT_ALLQUOTE builtins/complete.def - compopts: add allquote option for COPT_ALLQUOTE lib/readline/doc/rluser.texi - document allquote compopt --- builtins/complete.def| 1 + lib/readline/complete.c | 26 +- lib/readline/doc/rltech.texi | 7 +++ lib/readline/doc/rluser.texi | 3 +++ lib/readline/readline.h | 6 ++ pcomplete.c | 2 ++ pcomplete.h | 3 ++- 7 files changed, 34 insertions(+), 14 deletions(-) diff --git a/builtins/complete.def b/builtins/complete.def index 881c4711..89ce0726 100644 --- a/builtins/complete.def +++ b/builtins/complete.def @@ -141,6 +141,7 @@ static const struct _compopt { const char * const optname; unsigned long optflag; } compopts[] = { + { "allquote", COPT_ALLQUOTE }, { "bashdefault", COPT_BASHDEFAULT }, { "default", COPT_DEFAULT }, { "dirnames", COPT_DIRNAMES }, diff --git a/lib/readline/complete.c b/lib/readline/complete.c index bf7cc856..3cf050fc 100644 --- a/lib/readline/complete.c +++ b/lib/readline/complete.c @@ -336,6 +336,14 @@ int rl_filename_completion_desired = 0; entry finder function. */ int rl_filename_quoting_desired = 1; +/* Non-zero means that we should apply filename quoting to the matches + even if we are not otherwise treating the matches as filenames. This + is ALWAYS zero on entry, and can only be changed within a completion + entry finder function. */ +int rl_all_quoting_desired = 0; + +#define QUOTING_DESIRED (rl_all_quoting_desired || (rl_filename_completion_desired && rl_filename_quoting_desired)) + /* This function, if defined, is called by the completer when real filename completion is done, after all the matching names have been generated. It is passed a (char**) known as matches in the code below. @@ -510,7 +518,7 @@ static void set_completion_defaults (int what_to_do) { /* Only the completion entry function can change these. */ - rl_filename_completion_desired = 0; + rl_filename_completion_desired = rl_all_quoting_desired = 0; rl_filename_quoting_desired = 1; rl_completion_type = what_to_do; rl_completion_suppress_append = rl_completion_suppress_quote = 0; @@ -1408,10 +1416,7 @@ compute_lcd_of_matches (char **match_list, int matches, const char *text) check against the list of matches FI */ dtext = (char *)NULL; - if (rl_filename_completion_desired && - rl_filename_dequoting_function && - rl_completion_found_quote && - rl_filename_quoting_desired) + if (QUOTING_DESIRED && rl_completion_found_quote && rl_filename_dequoting_function) { dtext = (*rl_filename_dequoting_function) ((char *)text, rl_completion_quote_character); text = dtext; @@ -1766,10 +1771,7 @@ make_quoted_replacement (char *match, int mtype, char *qc) matches don't require a quoted substring. */ replacement = match; - should_quote = match && rl_completer_quote_characters && - rl_filename_completion_desired && - rl_filename_quoting_desired; - + should_quote = match && rl_completer_quote
Prompt save/restore for vi movement argument
Originally identified in [1] (FWIW, I only see the message in the archive, not in my inbox). [1]: https://lists.gnu.org/archive/html/bug-bash/2023-04/msg00043.html bash --norc -o vi -in <<<$'\ec1234567' (arg: 123456) Segmentation fault I think it's just a matter of save/restore_prompt diff --git a/lib/readline/vi_mode.c b/lib/readline/vi_mode.c index 7396e316..ddd8fd95 100644 --- a/lib/readline/vi_mode.c +++ b/lib/readline/vi_mode.c @@ -1090,6 +1090,7 @@ _rl_vi_arg_dispatch (int c) } else { + rl_restore_prompt (); rl_clear_message (); rl_stuff_char (key); return 0; /* done */ @@ -1331,6 +1332,7 @@ rl_domove_read_callback (_rl_vimotion_cxt *m) rl_numeric_arg = _rl_digit_value (c); rl_explicit_arg = 1; RL_SETSTATE (RL_STATE_NUMERICARG); + rl_save_prompt (); rl_digit_loop1 (); rl_numeric_arg *= save; c = rl_vi_domove_getchar (m);
segfault on initial word completion
$ bash --rcfile <(echo 'complete -C: -I') -i <<<$'; \cA\t' bash-5.2$ bash: xmalloc: cannot allocate 18446744073709551615 bytes $ bash --rcfile <(echo 'complete -C: -I') -i <<<$';\cA \t' bash-5.2$ Segmentation fault: 11
SIGINT not breaking loop with non-list function body
If an interactive shell line is just a single loop and the body of the loop consists only of a function name, and the function body is not a list, ^C does not cause the loop to break as it otherwise would. $ f() { cat; } $ while :; do f; done ^C The code in wait_for() checks the values of loop_level and executing_list: jobs.c:5095:55 -> 5095 if (signal_is_trapped (SIGINT) == 0 && (loop_level || (shell_compatibility_level > 32 && executing_list))) 5096 ADDINTERRUPT; but loop_level is reset by execute_function(): execute_cmd.c:5171:5 5170 if (shell_compatibility_level > 43) -> 5171 loop_level = 0; This code in wait_for seems to be the only place where the value of executing_list is tested, so maybe changing that variable to something like executing_list_or_loop and adding in/decrements of it in the loop-related execute_* functions would make sense.
Re: segfault on initial word completion
On Wed, Apr 19, 2023, 10:23 Chet Ramey wrote: > On 4/18/23 6:15 PM, Grisha Levit wrote: > > $ bash --rcfile <(echo 'complete -C: -I') -i <<<$'; \cA\t' > > bash-5.2$ bash: xmalloc: cannot allocate 18446744073709551615 bytes > > $ bash --rcfile <(echo 'complete -C: -I') -i <<<$';\cA \t' > > bash-5.2$ Segmentation fault: 11 > > > > Thanks for the report. > One more issue after the fix, the start can end up being further than pcomp_ind bash --rcfile <(echo 'complete -C: -I') -i <<<$' \cA\t' ERROR: AddressSanitizer: heap-buffer-overflow READ of size 1 at 0x0044e7a7d22f thread T0 #0 0x63e8175148 in bind_compfunc_variables pcomplete.c:946:7 #1 0x63e816afb0 in gen_command_matches pcomplete.c:1179:3 #2 0x63e8165358 in gen_compspec_completions pcomplete.c:1389:18 #3 0x63e81700e0 in gen_progcomp_completions pcomplete.c:1539:9 #4 0x63e816eb7c in programmable_completions pcomplete.c:1592:13 #5 0x63e810b4b8 in attempt_shell_completion bashline.c:1745:26
heap-buffer-overflow in history_expand
The history expansion code can end up reading past the end of the input line buffer if the line ends with an invalid multibyte sequence: bash --norc -in <<<$'X\n\e238Y!!\xC2\xC2' ERROR: AddressSanitizer: heap-buffer-overflow READ of size 1 at 0x000108b48400 thread T0 #0 0x104ed9c88 in history_expand histexpand.c:1129 #1 0x104b761b0 in pre_process_line bashhist.c:570 #2 0x10482a540 in shell_getc parse.y:2512 diff --git a/lib/readline/histexpand.c b/lib/readline/histexpand.c index db344b49..425ea7cf 100644 --- a/lib/readline/histexpand.c +++ b/lib/readline/histexpand.c @@ -1121,7 +1121,7 @@ history_expand (const char *hstring, char **output) c = tchar; memset (mb, 0, sizeof (mb)); - for (k = 0; k < MB_LEN_MAX; k++) + for (k = 0; k < MB_LEN_MAX && i < l; k++) { mb[k] = (char)c; memset (&ps, 0, sizeof (mbstate_t));
heap-use-after-free in rl_do_undo
This segfaults in a non-ASAN build: HISTFILE= INPUTRC=<(echo '"F": history-substring-search-forward') \ bash --norc -in <<<$'.\n..\n\cP\cT\cPF\cN\cN.\cPF\c_' ERROR: AddressSanitizer: heap-use-after-free on address 0x0001060082a8 READ of size 4 at 0x0001060082a8 thread T0 #0 0x1027627b8 in rl_do_undo undo.c:188 #1 0x102764b38 in rl_undo_command undo.c:358 #2 0x102661904 in _rl_dispatch_subseq readline.c:922 0x0001060082a8 is located 24 bytes inside of 32-byte region [0x000106008290,0x0001060082b0) freed by thread T0 here: #0 0x102f6afa4 in wrap_free+0x98 #1 0x1024c8648 in xfree xmalloc.c:140 #2 0x102761834 in _rl_free_undo_list undo.c:111 #3 0x10278fbcc in _rl_free_saved_history_line misc.c:404 #4 0x10269aed8 in rl_history_search_reinit search.c:637 #5 0x10269bec0 in rl_history_substr_search_forward search.c:688 #6 0x102661904 in _rl_dispatch_subseq readline.c:922 previously allocated by thread T0 here: #0 0x102f6ae68 in wrap_malloc+0x94 #1 0x1024c84ec in xmalloc xmalloc.c:107 #2 0x102761088 in alloc_undo_entry undo.c:75 #3 0x102760f60 in rl_add_undo undo.c:92 #4 0x102779198 in rl_insert_text text.c:113 #5 0x102781710 in _rl_insert_char text.c:903 #6 0x102782664 in rl_insert text.c:955 #7 0x102661904 in _rl_dispatch_subseq readline.c:922
recovering from parser errors during compound assignment
If a parser error is encountered when `local' is parsing an array assignment string, the installed unwind-protects never run: $ f() { local -a a='(())'; }; f bash: syntax error near unexpected token `(' $ declare -p a FUNCNAME declare -a a declare -a FUNCNAME=([0]="f") If there's a PROMPT_COMMAND set and a parser error is encountered during a compound assignment, the alias expansion state is not properly restored: $ PROMPT_COMMAND=:; alias A=: $ declare a=(()) bash: syntax error near unexpected token `(' $ A bash: A: command not found
EOF at PS2
A few issues with EOF being received at PS2: The setting of ignoreeof is ignored at PS2: bash --norc -o ignoreeof $ uname \ > ^D Linux (bash exits) If the previous line didn't terminate the current token, ^D causes the token (rather than the command) to be terminated and a new PS2 to be printed. The history code treats them as one word. bash --norc $ echo A\ > ^D > B A B $ !! echo AB AB And perhaps most surprising, if PS0 contains a command substitution, the wrong prompts are shown: PS0='$(:)' bash --norc $ uname \ > ^DLinux # The command is executed (without a newline printed) > uname \ # PS2 is displayed again, but we are actually reading a new cmd $ -n# PS1 is used for the continuation line localhost
Re: heap-use-after-free in rl_do_undo
On Wed, Apr 26, 2023 at 2:15 PM Chet Ramey wrote: > This one looks like it was already hit by the other guy sending in fuzzing > output. You're right, sorry I missed that.
Re: heap-buffer-overflow in history_expand
On Fri, Apr 28, 2023, 11:35 Chet Ramey wrote: > On 4/24/23 1:40 AM, Grisha Levit wrote: > > The history expansion code can end up reading past the end of the > > input line buffer if the line ends with an invalid multibyte sequence: > > Thanks for the report. You mean an incomplete multibyte character, I think. > Well I'm not quite sure. The (piped) input needs to have an invalid sequence (two leading bytes) but readline transforms this invalid sequence into a just a single leading byte. Piping input that simply ends in an leading byte doesn't trigger the issue -- that byte byte don't seem to make it into the input line. This is a bit off topic, but I don't really understand what happens with invalid input sequences in the input, see e.g.: $ bash --norc -i 2>/dev/null <<<$'printf %qn \240\340' $'\240' $ bash --norc -i 2>/dev/null <<<$'printf %qn \240\340.' $'\240.' $ bash --norc -i 2>/dev/null <<<$'printf %qn \240\340.\341' $'\240.\340' (Especially the last one where the 2nd and 3rd bytes of the string are reversed)
Re: heap-buffer-overflow in history_expand
On Sat, Apr 29, 2023, 14:02 Chet Ramey wrote: > On 4/28/23 9:28 PM, Grisha Levit wrote: > > Piping input that simply ends in an leading byte doesn't trigger the > issue > > -- that byte byte don't seem to make it into the input line. > > > > This is a bit off topic, but I don't really understand what happens with > > invalid input sequences in the input, see e.g.: > > They should be treated as individual bytes. > I think I see what's happening now. Readline accumulates the bytes until a complete character is read. However, this buffer is not flushed when the reading of a multibyte character is interrupted by inserting a single byte character, or by any non-insertion command. So for example, the \317 byte never gets a chance to be inserted here: bash --norc -in <<<$':\317:' $ :: And inserting the byte is deferred until the next byte with the 8th bit set is read (which can be at some arbitrary future time) here: bash --norc -in <<<$':\317\n: \200' $ : $ : π You can also reproduce interactively by binding the above input to a macro. Attached is a patch that I think should address this. From d7d1043e2bc2ad35ac2ef6a2cc45467085cf5835 Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Fri, 28 Apr 2023 23:24:34 -0400 Subject: [PATCH] fix invalid mb insert lib/readline/text.c - _rl_insert_char: return 1 when there are any pending bytes - _rl_insert_char: treat being called with a count of 0 as a signal to insert any pending bytes read so far. - rl_insert, rl_quoted_insert: if the final _rl_insert_char returns 1, call _rl_insert_char with a count of 0 to insert any pending bytes. --- lib/readline/text.c | 68 +++-- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/lib/readline/text.c b/lib/readline/text.c index 356cac5f..27d9292b 100644 --- a/lib/readline/text.c +++ b/lib/readline/text.c @@ -704,7 +704,8 @@ static mbstate_t ps = {0}; /* Insert the character C at the current location, moving point forward. If C introduces a multibyte sequence, we read the whole sequence and - then insert the multibyte char into the line buffer. */ + then insert the multibyte char into the line buffer. + If COUNT is 0, just insert any accumulated multibyte sequence as is. */ int _rl_insert_char (int count, int c) { @@ -718,11 +719,32 @@ _rl_insert_char (int count, int c) static int stored_count = 0; #endif +#if !defined (HANDLE_MULTIBYTE) if (count <= 0) return 0; +#else + if (count < 0) +return 0; + if (count == 0) +{ + /* If COUNT is 0, we were called because we are no longer reading + a multibyte character. If we were waiting to complete a character, +just insert any bytes we have read so far. */ + if (pending_bytes_length == 0) + return 0; -#if defined (HANDLE_MULTIBYTE) - if (MB_CUR_MAX == 1 || rl_byte_oriented) + if (stored_count <= 0) + stored_count = count; + else + count = stored_count; + + memcpy (incoming, pending_bytes, pending_bytes_length); + incoming[pending_bytes_length] = '\0'; + incoming_length = pending_bytes_length; + pending_bytes_length = 0; + memset (&ps, 0, sizeof (mbstate_t)); +} + else if (MB_CUR_MAX == 1 || rl_byte_oriented) { incoming[0] = c; incoming[1] = '\0'; @@ -730,6 +752,9 @@ _rl_insert_char (int count, int c) } else if (_rl_utf8locale && (c & 0x80) == 0) { + if (pending_bytes_length) +_rl_insert_char (0, 0); + incoming[0] = c; incoming[1] = '\0'; incoming_length = 1; @@ -827,7 +852,11 @@ _rl_insert_char (int count, int c) rl_insert_text (string); xfree (string); +#if defined (HANDLE_MULTIBYTE) + return pending_bytes_length ? 1 : 0; +#else return 0; +#endif } if (count > TEXT_COUNT_MAX) @@ -860,6 +889,8 @@ _rl_insert_char (int count, int c) xfree (string); incoming_length = 0; stored_count = 0; + + return pending_bytes_length ? 1 : 0; #else /* !HANDLE_MULTIBYTE */ char str[TEXT_COUNT_MAX+1]; @@ -873,9 +904,9 @@ _rl_insert_char (int count, int c) rl_insert_text (str); count -= decreaser; } -#endif /* !HANDLE_MULTIBYTE */ return 0; +#endif /* !HANDLE_MULTIBYTE */ } if (MB_CUR_MAX == 1 || rl_byte_oriented) @@ -903,9 +934,12 @@ _rl_insert_char (int count, int c) rl_insert_text (incoming); stored_count = 0; } -#endif + return pending_bytes_length ? 1 : 0; +#else /* !HANDLE_MULTIBYTE */ return 0; +#endif /* !HANDLE_MULTIBYTE */ + } /* Overwrite the character at point (or next COUNT characters) with C. @@ -955,7 +989,7 @@ rl_insert (int count, int c) r = (rl_insert_mode == RL_IM_INSERT) ? _rl_insert_char (count, c) : _rl_overwrite_char (count, c);
Re: nofork command substitution
This will be a very neat feature to have. One thing I'm excited about is the ability to cleanly perform assignments during prompt expansion (e.g. to capture information during PS0 or PS4 expansion). Something I suspect people will complain about is that this change makes some polyglot zsh/bash scripts that were previously syntactically valid no longer so. A particularly prominent one is git-completion.bash [1] distributed with git, which now reports syntax errors when sourced due to the expansion on line 414: 413if [[ -n ${ZSH_VERSION-} ]]; then 414unset ${(M)${(k)parameters[@]}:#__gitcomp_builtin_*} 2>/dev/null 415else 416unset $(compgen -v __gitcomp_builtin_) 417fi Hopefully, there are not too many such scripts and they can just gain some workarounds (like wrapping the zsh-only code in an eval). [1]: https://github.com/git/git/blob/v2.40.1/contrib/completion/git-completion.bash#L414
Re: parsing command substitution inside parameter expansion in interactive shell
On Mon, Mar 20, 2023 at 4:59 PM Chet Ramey wrote: > Thanks for the report. This is the same thing as > > https://lists.gnu.org/archive/html/bug-bash/2021-06/msg00115.html > > with the command substitution being embedded in the parameter expansion. One more similar case when the parameter expansion is quoted: bash --norc -in <<<$'"${_+$("\n")}"' bash-5.2$ "${_+$(" bash: command substitution: line 2: unexpected EOF while looking for matching `"' > ")}" bash --norc -in <<<$'"${_+$(:\n)}"' bash-5.2$ "${_+$(: bash: command substitution: line 3: unexpected EOF while looking for matching `)' > )}"
Re: nofork command substitution
An interactive shell gets confused (PS1 shown instead of PS2) when using newline as the first character of the substitution: $ bash --norc -i <<<$'${\n:;}' bash-5.2$ ${ bash-5.2$ :;} Things don't seem to work right with an empty funsub: bash --pretty-print <<<'${ }' ${ ; } $ bash -c '${ }' ERROR: AddressSanitizer: heap-buffer-overflow READ of size 1 at 0x005af9470f1f thread T0 #0 0x56ec06e390 in parse_comsub parse.y:4462:15 #1 0x56ec058344 in read_token_word parse.y:5294:16 #2 0x56ec04c144 in read_token parse.y:3642:12 I'm not sure if it makes sense to accept `${ }' (like `$( )') or not (like `{ }'). FWIW, mksh accepts both.
Unset during expansion w/ nofork sub
There are a lot of code paths that (reasonably) do not expect a variable to disappear while being expanded/assigned to, and usually cause a segfault if that happens (as is now possible with the new nofork command substitution). I don't think there is any legitimate use case to be supported here, and I can't really think of a sensible way to avoid this without adding a bunch of overhead. Hypothetically, bash could perform another variable lookup to confirm after expanding the other words in the PE, but this might have to deal with issues like array types changing when a new variable is brought into scope, etc. A similar situation is already ignored in mapfile's callback (which can unset the array being populated) and it seems fine to me to ignore these too, but posting in case others have better ideas. a=(x); a=(${ unset a; }) heap-use-after-free arrayfunc.c:667 in assign_compound_array_list a=(x); : ${a[${ unset a; }]} heap-use-after-free arrayfunc.c:1499 in array_value_internal a=(x); : ${#a[${ unset a; }]} heap-use-after-free subst.c:7378 in array_length_reference a=(x); : ${a[@]/${ unset a; }} heap-use-after-free subst.c:9331 in parameter_brace_patsub a=(x); : ${a[@]:${ unset a; }} heap-use-after-free subst.c:8305:11 in verify_substring_values a=(x); : ${a[@]:0:${ unset a; }} heap-use-after-free subst.c:8964:16 in parameter_brace_substring declare -A A; A=(x ${ unset A; }) heap-use-after-free variables.c:2859:36 in make_variable_value
Re: heap-buffer-overflow in history_expand
On Mon, May 1, 2023 at 11:48 AM Chet Ramey wrote: > Yes, I concluded the same thing. Thanks for the patch. I have one question > about the change to rl_insert: why overwrite any return value from the > initial call to _rl_insert_char by setting r back to 0? What if the initial > value of C starts an incomplete multibyte character, and is then followed > by a character that doesn't contribute? You're right, that was a mistake, I missed the presence of that initial _rl_insert_char call. I noticed a couple of other bits missing from the patch as applied though. (The first because pending_bytes_length is not defined without HANDLE_MULTIBYTE, the second to have quoted insert work without a negative argument). --- diff --git a/lib/readline/text.c b/lib/readline/text.c index b07ff470..e3e5bb9e 100644 --- a/lib/readline/text.c +++ b/lib/readline/text.c @@ -853,7 +853,11 @@ _rl_insert_char (int count, int c) rl_insert_text (string); xfree (string); +#if defined (HANDLE_MULTIBYTE) return (pending_bytes_length != 0); +#else + return 0; +#endif } if (count > TEXT_COUNT_MAX) @@ -1112,6 +1116,8 @@ rl_quoted_insert (int count, int key) r = _rl_insert_next (1); while (r == 0 && ++count < 0); } + else +r = _rl_insert_next (count); if (r == 1) _rl_insert_char (0, 0);/* insert partial multibyte character */
\U expansion in single-byte locale
If expanding a \u (or \U) escape sequence fails, Bash replaces the input escape sequence with a newly generated one: $ LC_ALL=C printf %b \\U80 \u0080 Since this new sequence may by longer than the input, it can cause an overflow in printf: $ bash-asan -c 'LC_ALL=C printf %b \\U80' ERROR: AddressSanitizer: heap-buffer-overflow ... WRITE of size 1 at 0x0001062028f5 thread T0 #0 0x1031bed94 in bexpand printf.def:1180 #1 0x1031ad19c in printf_builtin printf.def:610 TBH I found this fallback behavior kind of surprising and was expecting the escape sequence to expand to either a null string or to itself (though some tools assume __STDC_ISO_10646__ behavior even without checking and expand to what may (or may not) be the correct codepoint).
ctype.h functions on bytes 0x80..0xFF
On Mon, May 1, 2023 at 11:48 AM Chet Ramey wrote: > > (And once we get these issues straightened out, if you look back to your > original example, 0x240 is a blank in my locale, en_US.UTF-8, and will be > removed from the input stream by the parser unless it's quoted.) On at least recent macos versions, it seems that the ctype.h functions treat [0x80..0xFF] the same as wctype.h functions would. So while U+00A0 is a space character in the en_US.UTF-8 locale, and iswspace(L'\u00A0') returns 1, it is also the case that isspace(0xA0) returns 1. But I don't think it's correct to actually rely on the latter since the single byte 0xA0 doesn't represent _any_ character in the locale, much less a space. (I think that's the reason for the behavior Chet noted above from a previous thread). For example, these outputs would be correct with \uA0 in place of \xA0 below, but I don't think the current behaviour is expected: $ eval $'printf "<%s>" [\xA0\xA0]' <[><]> [[ $'\xA0' == [[:space:]] ]]; echo $? 0 Perhaps on platforms like this it would be appropriate to mask ctype results with something equivalent to `btowc(c) != WEOF'? (See http://www.openradar.me/FB9973780 for an example of the issue in an apple-supplied program)
Re: ctype.h functions on bytes 0x80..0xFF
The below seems like a cheap fix for UTF-8 locales. Since Bash falls back to using the single-byte glob matching functions when presented with invalid multibyte strings, this patch makes the glob code avoid calling the ctype functions or strcoll when handling individual bytes >0x7F (in a UTF-8 locale). This makes the following no longer evaluate to true on macos: [[ $'\xC0' == [[:upper:]] ]] [[ $'\xC0' == [[=A=]] ]] [[ $'\xC0' == $'\xE0' ]] # with nocasematch And on Linux with glibc (tested on Ubuntu 22.04) in en_US.UTF-8, strcoll returns 0 for any two invalid bytes, so the following is also no longer true: x=$'\x80'; [[ $'\xC0' == [[=$x=]] ]] The locale_setblanks change is for the macos issue with 0xA0 being treated as a blank (as U+00A0). There's no other code that changes CSHBRK in sh_syntaxtab so I think the simplifications are OK. --- diff --git a/lib/glob/smatch.c b/lib/glob/smatch.c index 12eb9d27..1c6b0229 100644 --- a/lib/glob/smatch.c +++ b/lib/glob/smatch.c @@ -141,6 +141,9 @@ rangecmp (int c1, int c2, int forcecoll) static int collseqcmp (int c, int equiv) { + if (locale_utf8locale && (UTF8_SINGLEBYTE (c) == 0 || UTF8_SINGLEBYTE (equiv) == 0)) +return (c == equiv); + if (charcmp (c, equiv, 1) == 0) return 1; @@ -281,6 +284,9 @@ is_cclass (int c, const char *name) enum char_class char_class; int result; + if (locale_utf8locale && UTF8_SINGLEBYTE(c) == 0) +return -1; + char_class = is_valid_cclass (name); if (char_class == CC_NO_CLASS) return -1; @@ -291,7 +297,8 @@ is_cclass (int c, const char *name) /* Now include `sm_loop.c' for single-byte characters. */ /* The result of FOLD is an `unsigned char' */ -# define FOLD(c) ((flags & FNM_CASEFOLD) \ +# define FOLD(c) (((flags & FNM_CASEFOLD) && \ +(locale_utf8locale == 0 || UTF8_SINGLEBYTE (c))) \ ? TOLOWER ((unsigned char)c) \ : ((unsigned char)c)) diff --git a/locale.c b/locale.c index eb24a517..b918db37 100644 --- a/locale.c +++ b/locale.c @@ -584,15 +584,10 @@ locale_setblanks (void) for (x = 0; x < sh_syntabsiz; x++) { - if (isblank ((unsigned char)x)) - sh_syntaxtab[x] |= CSHBRK|CBLANK; - else if (member (x, shell_break_chars)) - { - sh_syntaxtab[x] |= CSHBRK; - sh_syntaxtab[x] &= ~CBLANK; - } + if ((locale_utf8locale == 0 || (x & 0x80) == 0) && isblank ((unsigned char)x)) + sh_syntaxtab[x] |= CBLANK; else - sh_syntaxtab[x] &= ~(CSHBRK|CBLANK); + sh_syntaxtab[x] &= ~CBLANK; } }
No form commsub if last token ends with & or ;
Missing final `;': "$BASH" --pretty-print <<< $'${ : \;;}' ${ : \; } "$BASH" --pretty-print <<< $'${ : \;\n}' ${ : \; } "$BASH" --pretty-print <<< $'${ : \&;}' ${ : \& } "$BASH" --pretty-print <<< $'${ : \&\n}' ${ : \& } Correct: "$BASH" --pretty-print <<< $'${ : \;&}' ${ : \; & } "$BASH" --pretty-print <<< $'${ : \&&}' ${ : \& & }
[PATCH] leak in rl_filename_completion_function
If rl_filename_rewrite_hook returns a new string for a filename (which I guess only happens on macOS with bash), it is in most cases not free-d. run() { for ((i=0; i<=1; i++)); do ((i%1000)) || ps -o rss= $BASHPID compgen -f . >/dev/null done } $ (run) 2160 4576 6864 9040 11232 13424 15584 17712 19856 22000 24144 --- diff --git a/lib/readline/complete.c b/lib/readline/complete.c index bf7cc856..99556a35 100644 --- a/lib/readline/complete.c +++ b/lib/readline/complete.c @@ -2521,6 +2521,9 @@ rl_filename_completion_function (const char *text, int state) convfn = dentry = 0; while (directory && (entry = readdir (directory))) { + if (convfn != dentry) +xfree (convfn); + convfn = dentry = entry->d_name; convlen = dentlen = D_NAMLEN (entry); @@ -2572,6 +2575,9 @@ rl_filename_completion_function (const char *text, int state) users_dirname = (char *)NULL; } + if (convfn != dentry) +xfree (convfn); + return (char *)NULL; } else
[PATCH] use-after-free in expand_string_dollar_quote
A use-after-free happens in expand_string_dollar_quote if noexpand_translation is enabled and a string's translation is the same length as the string itself. --- diff --git a/subst.c b/subst.c index 08d9285e..a7a386d4 100644 --- a/subst.c +++ b/subst.c @@ -4231,12 +4231,17 @@ expand_string_dollar_quote (const char *string, int flags) continue; } trans = locale_expand (t, 0, news-sindex, 0, &translen); - free (t); if (singlequote_translations && ((news-sindex-1) != translen || STREQN (t, trans, translen) == 0)) - t = sh_single_quote (trans); + { + free (t); + t = sh_single_quote (trans); + } else - t = sh_mkdoublequoted (trans, translen, 0); + { + free (t); + t = sh_mkdoublequoted (trans, translen, 0); + } sindex = news; } #endif /* TRANSLATABLE_STRINGS */
bug-bash@gnu.org
The command printing code can fail to add a required semicolon when the last word in the command ends with `&' $ bash --pretty-print <<<$'{ \&;}' { \& } $ f() { if echo \&; then :; fi; } $ declare -f f f () { if echo \& then :; fi } $ eval "$(declare -f f)" bash: syntax error near unexpected token `fi' --- diff --git a/print_cmd.c b/print_cmd.c index 29870837..961c8dae 100644 --- a/print_cmd.c +++ b/print_cmd.c @@ -1446,8 +1446,10 @@ indent (int amount) static void semicolon (void) { - if (command_string_index > 0 && - (the_printed_command[command_string_index - 1] == '&' || + if ((command_string_index > 1 && + the_printed_command[command_string_index - 2] == ' ' && + the_printed_command[command_string_index - 1] == '&') || + (command_string_index > 0 && the_printed_command[command_string_index - 1] == '\n')) return; cprintf (";")
[PATCH] fix compgen -V leak
My earlier patch for adding compgen -V did the variable assignment in a pretty silly way and had a small memory leak to boot. Hope this new way makes sense, sorry for the extra work. --- >From b6b13b89e1436ddd575483a81e79ef43d82a5c0c Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Sat, 3 Jun 2023 16:37:02 -0400 Subject: [PATCH] fixup compgen -V array handling * builtins/complete.def - compgen_builtin: skip the unnecessary WORD_LIST conversion, avoid small leak * externs.h - remove strlist_to_word_list which was only added for initial version of compgen -V support --- builtins/complete.def | 7 +-- externs.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/builtins/complete.def b/builtins/complete.def index 890cf20d..bb03c6e9 100644 --- a/builtins/complete.def +++ b/builtins/complete.def @@ -678,7 +678,6 @@ compgen_builtin (WORD_LIST *list) int old_ind, old_completion, old_quoting, old_suppress; SHELL_VAR *var; char *varname; - WORD_LIST *alist; if (list == 0) return (EXECUTION_SUCCESS); @@ -763,11 +762,7 @@ compgen_builtin (WORD_LIST *list) var = builtin_find_indexed_array (varname, 1); if (var && sl && sl->list && sl->list_len) { - alist = strlist_to_word_list (sl, 0, 0); - assign_array_var_from_word_list (var, alist, 0); - free (sl); - sl = (STRINGLIST *)NULL; - dispose_words (alist); + array_from_argv (array_cell(var), sl->list, sl->list_len); rval = EXECUTION_SUCCESS; } } diff --git a/externs.h b/externs.h index a1363d4d..fe5aa492 100644 --- a/externs.h +++ b/externs.h @@ -414,7 +414,6 @@ extern STRINGLIST *strlist_prefix_suffix (STRINGLIST *, const char *, const char extern void strlist_print (STRINGLIST *, const char *); extern void strlist_walk (STRINGLIST *, sh_strlist_map_func_t *); extern void strlist_sort (STRINGLIST *); -extern WORD_LIST *strlist_to_word_list (STRINGLIST *, int, int); /* declarations for functions defined in lib/sh/stringvec.c */ -- 2.41.0
[PATCH] uninitialized variable access
Some uninitialized variable access identified by clang's static analyzer. (FWIW 90% of the reports were bogus but these seem legit) * lib/readline/input.c - rl_gather_tyi: the `result' variable is no longer initialized before first access since commit d0bc56a32 * lib/readline/kill.c - _rl_read_bracketed_paste_prefix: if there's no further input after the initial \e of the bracketed paste prefix, an uninitialized value of `key' can get used in the return value * subst.c - function_substitute: seems like the `tflag' assignment ended up in the wrong place? * builtins/read.def - read_builtin: with `read -n0', the initialization of saw_escape is goto-d over but that variable is later accessed --- diff --git a/lib/readline/input.c b/lib/readline/input.c index 229474ff..00605834 100644 --- a/lib/readline/input.c +++ b/lib/readline/input.c @@ -252,6 +252,7 @@ rl_gather_tyi (void) chars_avail = 0; input = 0; tty = fileno (rl_instream); + result = -1; /* Move this up here to give it first shot, but it can't set chars_avail */ /* XXX - need rl_chars_available_hook? */ diff --git a/lib/readline/kill.c b/lib/readline/kill.c index 1dfe3c57..1f13e447 100644 --- a/lib/readline/kill.c +++ b/lib/readline/kill.c @@ -779,7 +779,7 @@ _rl_read_bracketed_paste_prefix (int c) pbpref = BRACK_PASTE_PREF; /* XXX - debugging */ if (c != pbpref[0]) return (0); - pbuf[ind = 0] = c; + pbuf[ind = 0] = key = c; while (ind < BRACK_PASTE_SLEN-1 && (RL_ISSTATE (RL_STATE_INPUTPENDING|RL_STATE_MACROINPUT) == 0) && _rl_pushed_input_available () == 0 && diff --git a/subst.c b/subst.c index 08d9285e..e69e0e5b 100644 --- a/subst.c +++ b/subst.c @@ -7021,7 +7021,6 @@ function_substitute (char *string, int quoted, int flags) /* We call anonclose as part of the outer nofork unwind-protects */ BLOCK_SIGNAL (SIGINT, set, oset); lseek (afd, 0, SEEK_SET); - tflag = 0; istring = read_comsub (afd, quoted, flags, &tflag); UNBLOCK_SIGNAL (oset); } @@ -7029,6 +7028,7 @@ function_substitute (char *string, int quoted, int flags) { s = get_string_value ("REPLY"); istring = s ? comsub_quote_string (s, quoted, flags) : savestring (""); + tflag = 0; } run_unwind_frame ("nofork comsub"); /* restores stdout, job control stuff */ diff --git a/builtins/read.def b/builtins/read.def index cb4e1e59..80d1241d 100644 --- a/builtins/read.def +++ b/builtins/read.def @@ -403,6 +403,9 @@ read_builtin (WORD_LIST *list) input_string = (char *)xmalloc (size = 112); /* XXX was 128 */ input_string[0] = '\0'; + pass_next = 0; /* Non-zero signifies last char was backslash. */ + saw_escape = 0; /* Non-zero signifies that we saw an escape char */ + /* More input and options validation */ if (nflag == 1 && nchars == 0) { @@ -463,9 +466,6 @@ read_builtin (WORD_LIST *list) add_unwind_protect (xfree, rlbuf); #endif - pass_next = 0; /* Non-zero signifies last char was backslash. */ - saw_escape = 0; /* Non-zero signifies that we saw an escape char */ - if (tmsec > 0 || tmusec > 0) { /* Turn off the timeout if stdin is a regular file (e.g. from
[PATCH] null pointer deref in bindpwd
Only triggered by doing something stupid: bash -c 'declare -n OLDPWD=X[SHLVL=-1]; /; cd /' bash: line 1: X[SHLVL=-1]: bad array subscript Segmentation fault: 11 --- diff --git a/builtins/cd.def b/builtins/cd.def index de123f8b..e3156463 100644 --- a/builtins/cd.def +++ b/builtins/cd.def @@ -158,10 +158,9 @@ bindpwd (int no_symlinks) pwdvar = get_string_value ("PWD"); tvar = bind_variable ("OLDPWD", pwdvar, 0); - if (tvar && readonly_p (tvar)) + if (tvar == 0 || readonly_p (tvar)) r = EXECUTION_FAILURE; - - if (old_anm == 0 && array_needs_making && exported_p (tvar)) + else if (old_anm == 0 && array_needs_making && exported_p (tvar)) { update_export_env_inplace ("OLDPWD=", 7, pwdvar); array_needs_making = 0;
Various small leaks
Found mostly by normal usage running a no-bash-malloc build with clang's LeakSanitizer enabled. So far seems to provide very accurate results with little overhead. * arrayfunc.c - quote_compound_array_word: make sure to free VALUE - bind_assoc_var_internal: if assigning to a dynamic variable, make sure to free the key (usually assoc_insert would do it) * bashline.c - bash_command_name_stat_hook: free original *NAME if we are going to change what it points to (what the callers seem to expect) * builtins/evalstring.c - parse_and_execute: make sure to dispose of the parsed command resulting from a failed function import attempt - open_redir_file: if we did not get a pointer to pass back the expanded filename, make sure to free the name * examples/loadables/kv.c - kvsplit: bind_assoc_variable does not free its VALUE argument so do not pass a copy in the call calling * examples/loadables/stat.c - loadstat: bind_assoc_variable does not free its VALUE argument so make sure to do it * subst.c - param_expand: free temp1 value for codepaths that don't do it --- arrayfunc.c | 6 +- bashline.c| 1 + builtins/evalstring.c | 4 examples/loadables/kv.c | 2 +- examples/loadables/stat.c | 1 + subst.c | 2 ++ 6 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arrayfunc.c b/arrayfunc.c index 28aee54b..ea7a559d 100644 --- a/arrayfunc.c +++ b/arrayfunc.c @@ -193,7 +193,10 @@ bind_assoc_var_internal (SHELL_VAR *entry, HASH_TABLE *hash, char *key, const ch newval = make_array_variable_value (entry, 0, key, value, flags); if (entry->assign_func) -(*entry->assign_func) (entry, newval, 0, key); +{ + (*entry->assign_func) (entry, newval, 0, key); + FREE (key); +} else assoc_insert (hash, key, newval); @@ -945,6 +948,7 @@ quote_compound_array_word (char *w, int type) if (t != w+ind) free (t); strcpy (nword + i, value); + free (value); return nword; } diff --git a/bashline.c b/bashline.c index 6adc548e..c14d465f 100644 --- a/bashline.c +++ b/bashline.c @@ -1923,6 +1923,7 @@ bash_command_name_stat_hook (char **name) result = search_for_command (cname, 0); if (result) { + FREE (*name); *name = result; return 1; } diff --git a/builtins/evalstring.c b/builtins/evalstring.c index 11785de1..f4f62b2d 100644 --- a/builtins/evalstring.c +++ b/builtins/evalstring.c @@ -468,6 +468,8 @@ parse_and_execute (char *string, const char *from_file, int flags) should_jump_to_top_level = 0; last_result = last_command_exit_value = EX_BADUSAGE; set_pipestatus_from_exit (last_command_exit_value); + dispose_command(command); + global_command = (COMMAND *)NULL; reset_parser (); break; } @@ -766,6 +768,8 @@ open_redir_file (REDIRECT *r, char **fnp) if (fnp) *fnp = fn; + else +free (fn); return fd; } diff --git a/examples/loadables/kv.c b/examples/loadables/kv.c index 1dfceb6a..ee6dc7f0 100644 --- a/examples/loadables/kv.c +++ b/examples/loadables/kv.c @@ -71,7 +71,7 @@ kvsplit (SHELL_VAR *v, char *line, char *dstring) else value = ""; - return (bind_assoc_variable (v, name_cell (v), savestring (key), savestring (value), 0) != 0); + return (bind_assoc_variable (v, name_cell (v), savestring (key), value, 0) != 0); } int diff --git a/examples/loadables/stat.c b/examples/loadables/stat.c index 1093f7b0..d58f07e5 100644 --- a/examples/loadables/stat.c +++ b/examples/loadables/stat.c @@ -330,6 +330,7 @@ loadstat (char *vname, SHELL_VAR *var, char *fname, int flags, char *fmt, struct key = savestring (arraysubs[i]); value = statval (i, fname, flags, fmt, sp); v = bind_assoc_variable (var, vname, key, value, ASS_FORCE); + free (value); } return 0; } diff --git a/subst.c b/subst.c index b93374b8..0ecb4cf1 100644 --- a/subst.c +++ b/subst.c @@ -10850,6 +10850,7 @@ comsub: { chk_atstar (temp, quoted, pflags, quoted_dollar_at_p, contains_dollar_at); tdesc = parameter_brace_expand_word (temp, SPECIAL_VAR (temp, 0), quoted, pflags, 0); + free (temp1); if (tdesc == &expand_wdesc_error || tdesc == &expand_wdesc_fatal) return (tdesc); ret = tdesc; @@ -10862,6 +10863,7 @@ comsub: { set_exit_status (EXECUTION_FAILURE); report_error (_("%s: invalid variable name for name reference"), temp); + free (temp1); return (&expand_wdesc_error); /* XXX */ } else From 892cb679195298496a8fc86c36825b20b0a8e07c Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Sat, 3 Jun 2023 16:51:26 -0400 Subject: [PATCH] various leaks Found mostly by normal usage running a no-bash-malloc build with clang's LeakSanitizer enabled. So far seems to provide very accurate results. * arrayfunc.c - quote_compound_array_word: make sure to free VALUE - bind_asso
[PATCH] completion display interrupt leak handling
When readline's display of completion matches is interrupted, the match list is only freed for the '?' completion type but not for '!' and '@' types. I couldn't find a specific reason for that to be the case so hope this patch is an OK fix. Also add freeing saved_line_buffer from rl_complete_internal to the signal handler and freeing of matches in one spot where menu completion doesn't do it. * lib/readline/complete.c - complete_sigcleanarg_t: new struct to hold _rl_complete_sigcleanup argument - _rl_complete_sigcleanup: use new struct instead of *matches pointer, now also handles freeing saved_line - rl_complete_internal: set up _rl_sigcleanup whenever display_matches is called so we no longer leak matches and saved_line_buffer when getting a SIGINT - rl_menu_complete: free contents of matches instead of just pointer when completion-query-items exceeded --- lib/readline/complete.c | 61 ++--- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/lib/readline/complete.c b/lib/readline/complete.c index bf7cc856..f8f08aa6 100644 --- a/lib/readline/complete.c +++ b/lib/readline/complete.c @@ -494,12 +494,19 @@ _rl_reset_completion_state (void) rl_completion_quote_character = 0; } +typedef struct { +char **matches; +char *saved_line; +} complete_sigcleanarg_t; + static void _rl_complete_sigcleanup (int sig, void *ptr) { + complete_sigcleanarg_t *arg = ptr; if (sig == SIGINT) /* XXX - for now */ { - _rl_free_match_list ((char **)ptr); + _rl_free_match_list (arg->matches); + FREE (arg->saved_line); _rl_complete_display_matches_interrupt = 1; } } @@ -2008,7 +2015,8 @@ rl_complete_internal (int what_to_do) int start, end, delimiter, found_quote, i, nontrivial_lcd; char *text, *saved_line_buffer; char quote_char; - int tlen, mlen, saved_last_completion_failed; + int tlen, mlen, saved_last_completion_failed, do_display; + complete_sigcleanarg_t cleanarg; RL_SETSTATE(RL_STATE_COMPLETING); @@ -2088,6 +2096,8 @@ rl_complete_internal (int what_to_do) if (matches && matches[0] && *matches[0]) last_completion_failed = 0; + do_display = 0; + switch (what_to_do) { case TAB: @@ -2121,13 +2131,13 @@ rl_complete_internal (int what_to_do) { if (what_to_do == '!') { - display_matches (matches); + do_display = 1; break; } else if (what_to_do == '@') { if (nontrivial_lcd == 0) - display_matches (matches); + do_display = 1; break; } else if (rl_editing_mode != vi_mode) @@ -2151,23 +2161,7 @@ rl_complete_internal (int what_to_do) append_to_match (matches[0], delimiter, quote_char, nontrivial_lcd); break; } - - if (rl_completion_display_matches_hook == 0) - { - _rl_sigcleanup = _rl_complete_sigcleanup; - _rl_sigcleanarg = matches; - _rl_complete_display_matches_interrupt = 0; - } - display_matches (matches); - if (_rl_complete_display_matches_interrupt) -{ - matches = 0; /* already freed by rl_complete_sigcleanup */ - _rl_complete_display_matches_interrupt = 0; - if (rl_signal_event_hook) - (*rl_signal_event_hook) (); /* XXX */ -} - _rl_sigcleanup = 0; - _rl_sigcleanarg = 0; + do_display = 1; break; default: @@ -2180,6 +2174,29 @@ rl_complete_internal (int what_to_do) return 1; } + if (do_display) +{ + if (rl_completion_display_matches_hook == 0) + { + _rl_sigcleanup = _rl_complete_sigcleanup; + cleanarg.matches = matches; + cleanarg.saved_line = saved_line_buffer; + _rl_sigcleanarg = &cleanarg; + _rl_complete_display_matches_interrupt = 0; + } + display_matches (matches); + if (_rl_complete_display_matches_interrupt) + { + matches = 0; /* already freed by rl_complete_sigcleanup */ + saved_line_buffer = 0; + _rl_complete_display_matches_interrupt = 0; + if (rl_signal_event_hook) + (*rl_signal_event_hook) (); /* XXX */ + } + _rl_sigcleanup = 0; + _rl_sigcleanarg = 0; +} + _rl_free_match_list (matches); /* Check to see if the line has changed through all of this manipulation. */ @@ -2864,7 +2881,7 @@ rl_menu_complete (int count, int ignore) if (rl_completion_query_items > 0 && match_list_size >= rl_completion_query_items) { rl_ding (); - FREE (matches); + _rl_free_match_list (matches); matches = (char **)0; full_completion = 1; return (0); From 8eef36cd74caae425e536ead84b1a8cb1cad44b7 Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Sat, 3 Jun 2023 23:31:16 -0400 Subject: [PATCH] completion display interrupt leak handling When readline's display of completion matches is interrupted, the match list is only freed for the '?' completion type but not for '!&
Re: No form commsub if last token ends with & or ;
On Sunday, May 28, 2023, Grisha Levit wrote: > Missing final `;': > > "$BASH" --pretty-print <<< $'${ : \;;}' > ${ : \; } > The latest set of fixes to this code solves these cases but others have issues: $ bash --pretty-print <<<$'${ : \;;\n}' ${ : \; } $ bash --pretty-print <<<$'${ : \;\n\n}' ${ : \; } $ bash --pretty-print <<<$'${ : \&;\n}' ${ : \& } $ bash --pretty-print <<<$'${ : \&;\n\n}' ${ : \& } I think it might be ok to just have print_comsub handle adding the final semicolon when needed instead of trying to add it after the fact in parse_comsub? (Patch below includes earlier fixes to semicolon() from https://lists.gnu.org/archive/html/bug-bash/2023-06/msg00018.html) --- diff --git a/externs.h b/externs.h index a1363d4d..b1eb53dc 100644 --- a/externs.h +++ b/externs.h @@ -36,7 +36,7 @@ extern intmax_t evalexp (const char *, int, int *); #define FUNC_EXTERNAL 0x02 extern char *make_command_string (COMMAND *); -extern char *print_comsub (COMMAND *); +extern char *print_comsub (COMMAND *, int); extern char *named_function_string (char *, COMMAND *, int); extern void print_command (COMMAND *); diff --git a/parse.y b/parse.y index c139a4d7..150bba65 100644 --- a/parse.y +++ b/parse.y @@ -4297,7 +4297,7 @@ static char * parse_comsub (int qc, int open, int close, size_t *lenp, int flags) { int peekc, r; - int start_lineno, dolbrace_spec, local_extglob, was_extpat, was_word; + int start_lineno, dolbrace_spec, local_extglob, was_extpat; char *ret, *tcmd; size_t retlen; sh_parser_state_t ps; @@ -4342,7 +4342,6 @@ parse_comsub (int qc, int open, int close, size_t *lenp, int flags) save_parser_state (&ps); was_extpat = (parser_state & PST_EXTPAT); - was_word = 0; /* State flags we don't want to persist into command substitutions. */ parser_state &= ~(PST_REGEXP|PST_EXTPAT|PST_CONDCMD|PST_CONDEXPR|PST_COMPASSIGN); @@ -4388,14 +4387,6 @@ parse_comsub (int qc, int open, int close, size_t *lenp, int flags) r = yyparse (); - if (open == '{') -{ - if (current_token == shell_eof_token && - (last_read_token == ';' || last_read_token == '\n') && - (token_before_that == WORD || token_before_that == ASSIGNMENT_WORD)) - was_word = 1; -} - if (need_here_doc > 0) { internal_warning ("command substitution: %d unterminated here-document%s", need_here_doc, (need_here_doc == 1) ? "" : "s"); @@ -4459,7 +4450,7 @@ INTERNAL_DEBUG(("current_token (%d) != shell_eof_token (%c)", current_token, she restore_parser_state (&ps); pushed_string_list = saved_strings; - tcmd = print_comsub (parsed_command);/* returns static memory */ + tcmd = print_comsub (parsed_command, open); /* returns static memory */ retlen = strlen (tcmd); if (open == '(') /* ) */ { @@ -4481,17 +4472,10 @@ INTERNAL_DEBUG(("current_token (%d) != shell_eof_token (%c)", current_token, she } else /* open == '{' } */ { - int lastc; - - lastc = tcmd[retlen - 1]; retlen++; - ret = xmalloc (retlen + 4); + ret = xmalloc (retlen + 3); ret[0] = (dolbrace_spec == '|') ? '|' : ' '; - strcpy (ret + 1, tcmd); /* ( */ - if (was_word) - ret[retlen++] = ';'; - else if (lastc != '\n' && lastc != ';' && lastc != '&') - ret[retlen++] = ';'; + strcpy (ret + 1, tcmd); ret[retlen++] = ' '; } ret[retlen++] = close; diff --git a/print_cmd.c b/print_cmd.c index 29870837..0a403c6f 100644 --- a/print_cmd.c +++ b/print_cmd.c @@ -161,12 +161,14 @@ make_command_string (COMMAND *command) back into an external representation without turning newlines into `;'. Placeholder for other changes, if any are necessary. */ char * -print_comsub (COMMAND *command) +print_comsub (COMMAND *command, int open) { char *ret; printing_comsub++; ret = make_command_string (command); + if (open == '{') +semicolon(); printing_comsub--; return ret; } @@ -1446,9 +1448,11 @@ indent (int amount) static void semicolon (void) { - if (command_string_index > 0 && - (the_printed_command[command_string_index - 1] == '&' || - the_printed_command[command_string_index - 1] == '\n')) + if ((command_string_index > 1 && + (the_printed_command[command_string_index - 2] == ' ' && +the_printed_command[command_string_index - 1] == '&')) || + (command_string_index > 0 && + (the_printed_command[command_string_index - 1] == '\n'))) return; cprintf (";"); }
Re: Various small leaks
On Wed, Jun 21, 2023, 10:08 Chet Ramey wrote: > > On 6/17/23 4:02 AM, Grisha Levit wrote: > > Found mostly by normal usage running a no-bash-malloc build with clang's > > LeakSanitizer enabled. > > Are you running this on macOS or some other system? I actually couldn't figure out how to get useful results from LeakSanitizer on macOS. It's disabled in Apple's build of clang and though it's enabled in homebrew's it seemed to be producing bogus results with unusable traces. So I was testing on Linux, configured with: ~/src/bash/configure -C CC=clang CFLAGS='-g -O0 -fsanitize=leak -fno-common -fno-omit-frame-pointer -fno-optimize-sibling-calls -Wno-parentheses -Wno-format-security' --without-bash-malloc This produces rather useful reports like: $ LSAN_OPTIONS=detect-leaks=1:strip_path_prefix=$HOME/src/bash/ ./bash -c 'BASH_ALIASES[foo]=bar' = ==58001==ERROR: LeakSanitizer: detected memory leaks Direct leak of 4 byte(s) in 1 object(s) allocated from: #0 0xd6573fa4 in malloc (/home/parallels/bld/bash-lsan/bash+0x63fa4) (BuildId: f61b4a39b5a8121d7dce584eee376250b9b2a92a) #1 0xd66333dc in xmalloc xmalloc.c:107:10 #2 0xd65d3e54 in string_list_internal subst.c:2798:13 #3 0xd65d40ec in string_list subst.c:2840:11 #4 0xd65e298c in expand_subscript_string subst.c:10935:13 #5 0xd660dff4 in assign_array_element_internal arrayfunc.c:386:9 #6 0xd660dc60 in assign_array_element arrayfunc.c:353:11 #7 0xd65d74a8 in do_assignment_internal subst.c:3588:15 #8 0xd65d7810 in do_word_assignment subst.c:3646:10 #9 0xd65f7608 in do_assignment_statements subst.c:12971:14 #10 0xd65e335c in expand_word_list_internal subst.c:13049:6 #11 0xd65e3290 in expand_words subst.c:12372:11 #12 0xd659f210 in execute_simple_command execute_cmd.c:4501:15 #13 0xd659caa8 in execute_command_internal execute_cmd.c:889:4 #14 0xd663f74c in parse_and_execute builtins/evalstring.c:549:17 #15 0xd657a308 in run_one_command shell.c:1452:12 #16 0xd6577e80 in main shell.c:757:7 #17 0xbdd773f8 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16 #18 0xbdd774c8 in __libc_start_main csu/../csu/libc-start.c:392:3 #19 0xd654866c in _start (/home/parallels/bld/bash-lsan/bash+0x3866c) (BuildId: f61b4a39b5a8121d7dce584eee376250b9b2a92a) SUMMARY: LeakSanitizer: 4 byte(s) leaked in 1 allocation(s).
Re: Various small leaks
On Wed, Jun 21, 2023 at 3:09 PM Chet Ramey wrote: > Have you considered running `make tests' on a bash binary built with this > configuration? I'd be interested in those results. I have, but unfortunately this generates quite a few reports that are a little tricky to chase down. The main difficulty is that lsan looks for unreachable blocks (only) when the process exits so we only know that there's a leak somewhere in e.g. the alias tests or the arith tests but not at which line. There's also a lot of duplication because once a leak has occurred, lsan will end up generating the same report for every subsequently invoked subshell. The latter fact makes it easy to see leaks in an interactive shell though, by putting '$(:)' in PS1 for example. Anyway, most of the leaks reported from running the test suite seem to stem from longjmp's due to expansion/arithmetic/syntax errors, like: $ ./bash -c 'let x=' bash: line 1: let: x=: arithmetic syntax error: operand expected (error token is "=") Direct leak of 2 byte(s) in 1 object(s) allocated from: #1 0xdf1604ac in xmalloc xmalloc.c:107:10 #2 0xdf0ebb64 in expassign expr.c:528:13 #3 0xdf0eacd8 in expcomma expr.c:487:11 #4 0xdf0e9d3c in subexpr expr.c:468:9 #5 0xdf0e9a2c in evalexp expr.c:434:9 #6 0xdf174dac in let_builtin builtins/let.def:102:13 $ ./bash -c 'eval ": a=()"' bash: eval: line 1: syntax error near unexpected token `(' bash: eval: line 1: `: a=()' Direct leak of 32 byte(s) in 1 object(s) allocated from: #1 0xe69604ac in xmalloc xmalloc.c:107:10 #2 0xe68c464c in make_bare_simple_command make_cmd.c:457:24 #3 0xe68c46f0 in make_simple_command make_cmd.c:482:17 #4 0xe68ac320 in yyparse parse.y:813:45 #5 0xe68aaea8 in parse_command eval.c:354:7 #6 0xe696c3a4 in parse_and_execute builtins/evalstring.c:440:11 #7 0xe696d4a0 in evalstring builtins/evalstring.c:831:9 #8 0xe696a638 in eval_builtin builtins/eval.def:55:18 Though here's one that's not error-related: $ ./bash -O globstar -c 'echo **/foo*' **/foo* Direct leak of 8 byte(s) in 1 object(s) allocated from: #1 0xc3320ca8 in glob_vector lib/glob/glob.c:958:31 #2 0xc3321ff8 in glob_filename lib/glob/glob.c:1328:21 #3 0xc32c0038 in shell_glob_filename pathexp.c:416:13 #4 0xc32b5448 in glob_expand_word_list subst.c:12422:17 #5 0xc32a0ac0 in expand_word_list_internal subst.c:13081:13 #6 0xc32a0914 in expand_words subst.c:12374:11 #7 0xc325dc60 in execute_simple_command execute_cmd.c:4501:15 There's quite a few others but they mostly seem harmless, things like the case command not being freed in `case x in *) exit;; esac'.
uninitialized variable access in read_builtin
`read' can hit its timeout before it gets a chance to save the current signal mask so sigprocmask can end up restoring an uninitialized prevset. (Also all the sigprocmask calls other than the one in the jmp target are protected by `#if defined (SIGCHLD)' so I guess this one should be too) Found by running the test suite on a build with clang's MemorySanitizer enabled. There were only two other reports, both from quite recent additions, so I'll just mention them here: * anonopen doesn't set *fn if memfd_create is used so uw_anonclose frees an uninitialized pointer value later * convert_validarray_flags_to_arrayval_flags doesn't initialize avflags --- diff --git a/builtins/read.def b/builtins/read.def index cb4e1e59..ecfb3d4a 100644 --- a/builtins/read.def +++ b/builtins/read.def @@ -428,6 +428,7 @@ read_builtin (WORD_LIST *list) sigemptyset (&chldset); sigprocmask (SIG_BLOCK, (sigset_t *)0, &chldset); sigaddset (&chldset, SIGCHLD); + sigprocmask (SIG_SETMASK, (sigset_t *)0, &prevset); #endif begin_unwind_frame ("read_builtin"); @@ -495,7 +496,9 @@ read_builtin (WORD_LIST *list) if (code) { reset_timeout (); +#if defined (SIGCHLD) sigprocmask (SIG_SETMASK, &prevset, (sigset_t *)0); +#endif /* Tricky. The top of the unwind-protect stack is the free of input_string. We want to run all the rest and use input_string,
temp env vs export
Using `export' / `readonly' on a variable that's present in both the temp env and a calling function's local context combines the attributes of all the intervening scopes in the global variable: $ declare -A v; f() { local -a v; v= e; }; e() { export v; } $ (f; declare -p v) declare -aAx v=([0]="") $ (f; v=) Segmentation fault: 11
Re: EOF at PS2
On Fri, May 26, 2023, 17:44 Chet Ramey wrote: > > On 4/26/23 5:38 PM, Grisha Levit wrote: > > A few issues with EOF being received at PS2: > > I finally had a chance to check out the ksh88 behavior on a Solaris 10 VM. > It's pretty bizarre, but it does point to some improvements when ignoreeof > is enabled. It seems like the POSIX committee wanted the ksh88 behavior > without the weird aspects. I didn't check ksh93. > > I think we can have bash honor ignoreeof and not exit after delimiting the > token or command while not acting quite as bizarrely as ksh88. I think the new behavior is quite reasonable. Though it does need something to allow the exit builtin to work even with ignoreeof, like --- diff --git a/eval.c b/eval.c index 8337a275..f91a09fe 100644 --- a/eval.c +++ b/eval.c @@ -185,7 +185,7 @@ reader_loop (void) } if (EOF_Reached && interactive && ignoreeof && parse_and_execute_level == 0) { - if (handle_ignoreeof (1)) + if (code != EXITBLTIN && handle_ignoreeof (1)) EOF_Reached = 0; } }
xtrace ansi quoting vs shell metas
When printing the command for xtrace, if a word or assignment rhs contains a shell meta character, ansi quoting is not applied even if it otherwise should be. $ (set -x; : $'_\1' $'*\1') |& cat -v + : $'_\001' '*^A' It seems that the order of the sh_contains_shell_metas and ansic_shouldquote tests should be reversed in the xtrace_print_* functions (which would match their usage in the rest of the code) --- diff --git a/print_cmd.c b/print_cmd.c index f8524e04..d7c5bbf9 100644 --- a/print_cmd.c +++ b/print_cmd.c @@ -514,10 +514,10 @@ xtrace_print_assignment (char *name, char *value, int assign_list, int xflags) /* VALUE should not be NULL when this is called. */ if (*value == '\0' || assign_list) nval = value; - else if (sh_contains_shell_metas (value)) -nval = sh_single_quote (value); else if (ansic_shouldquote (value)) nval = ansic_quote (value, 0, (int *)0); + else if (sh_contains_shell_metas (value)) +nval = sh_single_quote (value); else nval = value; @@ -554,15 +554,15 @@ xtrace_print_word_list (WORD_LIST *list, int xtflags) fprintf (xtrace_fp, "''%s", w->next ? " " : ""); else if (xtflags & 2) fprintf (xtrace_fp, "%s%s", t, w->next ? " " : ""); - else if (sh_contains_shell_metas (t)) + else if (ansic_shouldquote (t)) { - x = sh_single_quote (t); + x = ansic_quote (t, 0, (int *)0); fprintf (xtrace_fp, "%s%s", x, w->next ? " " : ""); free (x); } - else if (ansic_shouldquote (t)) + else if (sh_contains_shell_metas (t)) { - x = ansic_quote (t, 0, (int *)0); + x = sh_single_quote (t); fprintf (xtrace_fp, "%s%s", x, w->next ? " " : ""); free (x); }
leak in skip_double_quoted for funsub
diff --git a/subst.c b/subst.c index 215e3469..63ca3370 100644 --- a/subst.c +++ b/subst.c @@ -1042,7 +1042,7 @@ skip_double_quoted (const char *string, size_t slen, int sind, int flags) if (string[i + 1] == LPAREN) ret = extract_command_subst (string, &si, SX_NOALLOC|(flags&SX_COMPLETE)); else if (string[i + 1] == LBRACE && FUNSUB_CHAR (string[si])) - ret = extract_function_subst (string, &si, Q_DOUBLE_QUOTES, (flags & SX_COMPLETE)); + ret = extract_function_subst (string, &si, Q_DOUBLE_QUOTES, SX_NOALLOC|(flags&SX_COMPLETE)); else ret = extract_dollar_brace_string (string, &si, Q_DOUBLE_QUOTES, SX_NOALLOC|(flags&SX_COMPLETE));
[PATCH] unwind protect for bind -x commands
If SIGINT is received during the execution of a bind -x command, the memory allocated for the saved parser state is leaked and the READLINE_* variables remain in the environment * pcomplete.c,bashline.c: - uw_restore_parser_state,uw_rl_set_signals: move from pcomplete.c to bashline.c * bashline.h: - extern declarations for uw_restore_parser_state, uw_rl_set_signals * bashline.c: - unbind_bindx_vars,uw_unbind_bindx_vars: new function to unbind shell variables set by bind -x commands - bash_execute_unix_command: use unbind_bindx_vars, add unwind-protects for rl_set_signals, restore_parser_state, unbind_bindx_vars The existing use of uw_restore_parser_state in gen_shell_function_matches creates a 'restrict' copy of the pointer to the saved parser state. I don't really understand why that is, so I didn't do that in bash_execute_unix_command, but wanted to note here just in case. --- bashline.c | 40 ++-- bashline.h | 3 +++ pcomplete.c | 12 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/bashline.c b/bashline.c index 02f36e3a..70d6778f 100644 --- a/bashline.c +++ b/bashline.c @@ -4428,6 +4428,34 @@ readline_set_char_offset (int ind, int *varp) } } +void +uw_restore_parser_state (void *ps) +{ + restore_parser_state (ps); +} + +void +uw_rl_set_signals (void *ignore) +{ + rl_set_signals (); +} + +static void +unbind_bindx_vars (void) +{ + check_unbind_variable ("READLINE_LINE"); + check_unbind_variable ("READLINE_POINT"); + check_unbind_variable ("READLINE_MARK"); + check_unbind_variable ("READLINE_ARGUMENT"); + array_needs_making = 1; +} + +static void +uw_unbind_bindx_vars (void *ignore) +{ + unbind_bindx_vars (); +} + int bash_execute_unix_command (int count, int key) { @@ -4507,11 +4535,16 @@ bash_execute_unix_command (int count, int key) } array_needs_making = 1; + begin_unwind_frame ("bindx"); save_parser_state (&ps); + add_unwind_protect (uw_unbind_bindx_vars, NULL); + add_unwind_protect (uw_restore_parser_state, &ps); + add_unwind_protect (uw_rl_set_signals, NULL); rl_clear_signals (); r = parse_and_execute (savestring (cmd), "bash_execute_unix_command", SEVAL_NOHIST); rl_set_signals (); restore_parser_state (&ps); + discard_unwind_frame ("bindx"); v = find_variable ("READLINE_LINE"); maybe_make_readline_line (v ? value_cell (v) : 0); @@ -4524,12 +4557,7 @@ bash_execute_unix_command (int count, int key) if (v && legal_number (value_cell (v), &mi)) readline_set_char_offset (mi, &rl_mark); - check_unbind_variable ("READLINE_LINE"); - check_unbind_variable ("READLINE_POINT"); - check_unbind_variable ("READLINE_MARK"); - check_unbind_variable ("READLINE_ARGUMENT"); - array_needs_making = 1; - + unbind_bindx_vars (); /* and restore the readline buffer and display after command execution. */ /* If we clear the last line of the prompt above, redraw only that last line. If the command returns 124, we redraw unconditionally as in diff --git a/bashline.h b/bashline.h index d9fb7379..2079799a 100644 --- a/bashline.h +++ b/bashline.h @@ -56,6 +56,9 @@ extern char **bash_default_completion (const char *, int, int, int, int); extern void set_directory_hook (void); /* Used by programmable completion code. */ +extern void uw_rl_set_signals (void *); +extern void uw_restore_parser_state (void *); + extern char *command_word_completion_function (const char *, int); extern char *bash_groupname_completion_function (const char *, int); extern char *bash_servicename_completion_function (const char *, int); diff --git a/pcomplete.c b/pcomplete.c index 410a7b7d..717a1479 100644 --- a/pcomplete.c +++ b/pcomplete.c @@ -1030,18 +1030,6 @@ build_arg_list (const char *cmd, const char *cname, const char *text, WORD_LIST return ret; } -static void -uw_restore_parser_state (void *ps) -{ - restore_parser_state (ps); -} - -static void -uw_rl_set_signals (void *ignore) -{ - rl_set_signals (); -} - /* Build a command string with $0 == cs->funcname (function to execute for completion list) $1 == command name (command being completed) -- 2.41.0 0001-unwind-protect-for-bind-x-commands.patch Description: Binary data
[PATCH] printing multiple heredocs in list
If there are multiple commands in a row that each require printing the connector prior to the heredoc body, the connector ends up in the wrong place for commands after the first: fun() { catsecond); - PRINT_DEFERRED_HEREDOCS (""); + if (was_heredoc) + PRINT_DEFERRED_HEREDOCS (""); printing_connection--; break; -- 2.41.0
[PATCH] fix printing command after group with heredoc
If the last redirection list in a group / subshell / substitution has a heredoc and the following connector is a semicolon, the connector is incorrectly skipped over. This happens only outside of function definitions, so I think it's an issue only for pretty-print mode. bash --pretty-print <<<$'(:<
[PATCH] sleep builtin signal handling
The sleep builtin currently doesn't do much in the way of signal management, so for example it will return on SIGWINCH, which I think most users will be particularly surprised by the first time they notice a sleep ending early due to a window resize. I looked at adding more signal handling to the current select-based implementation but then realized it would be a lot easier to just use nanosleep(2). Both select() and nanosleep() first appear in 1997's XSH Issue 5 so I don't know if we need to worry much about systems without it. (The patch leaves the select/pselect code as fallback though). --- diff --git a/config.h.in b/config.h.in index c2750a2a..1b6a1853 100644 --- a/config.h.in +++ b/config.h.in @@ -771,6 +771,9 @@ /* Define if you have the mkstemp function. */ #undef HAVE_MKSTEMP +/* Define if you have the nonosleep function. */ +#undef HAVE_NANOSLEEP + /* Define if you have the pathconf function. */ #undef HAVE_PATHCONF diff --git a/configure.ac b/configure.ac index 29f50438..de7d693c 100644 --- a/configure.ac +++ b/configure.ac @@ -850,9 +850,9 @@ AC_REPLACE_FUNCS(rename) dnl checks for c library functions AC_CHECK_FUNCS(bcopy bzero clock_gettime confstr faccessat fnmatch \ getaddrinfo gethostbyname getservbyname getservent inet_aton \ - imaxdiv memmove pathconf putenv raise random regcomp regexec \ - setenv setlinebuf setlocale setvbuf siginterrupt strchr \ - sysconf syslog tcgetattr times ttyname tzset unsetenv) + imaxdiv memmove nanosleep pathconf putenv raise random regcomp \ + regexec setenv setlinebuf setlocale setvbuf siginterrupt \ + strchr sysconf syslog tcgetattr times ttyname tzset unsetenv) AC_CHECK_FUNCS(vasprintf asprintf) AC_CHECK_FUNCS(isascii isblank isgraph isprint isspace isxdigit) diff --git a/lib/sh/ufuncs.c b/lib/sh/ufuncs.c index 247224d6..b3894450 100644 --- a/lib/sh/ufuncs.c +++ b/lib/sh/ufuncs.c @@ -82,7 +82,26 @@ falarm (unsigned int secs, unsigned int usecs) /* A version of sleep using fractional seconds and select. I'd like to use `usleep', but it's already taken */ -#if defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined (HAVE_PSELECT)) +#if defined (HAVE_NANOSLEEP) +int +fsleep(unsigned int sec, unsigned int usec) +{ + int r; + struct timespec req, rem; + + req.tv_sec = sec; + req.tv_nsec = usec * 1000; + + for (;;) +{ + QUIT; + r = nanosleep (&req, &rem); + if (r == 0 || errno != EINTR) +return r; + req = rem; +} +} +#elif defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined (HAVE_PSELECT)) int fsleep(unsigned int sec, unsigned int usec) { From a33289b218028b6d72966d8253646d6f174e09a5 Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Thu, 29 Jun 2023 21:55:05 -0400 Subject: [PATCH] fsleep: use nanosleep, handle signals --- config.h.in | 3 +++ configure.ac| 6 +++--- lib/sh/ufuncs.c | 21 - 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/config.h.in b/config.h.in index c2750a2a..1b6a1853 100644 --- a/config.h.in +++ b/config.h.in @@ -771,6 +771,9 @@ /* Define if you have the mkstemp function. */ #undef HAVE_MKSTEMP +/* Define if you have the nonosleep function. */ +#undef HAVE_NANOSLEEP + /* Define if you have the pathconf function. */ #undef HAVE_PATHCONF diff --git a/configure.ac b/configure.ac index 29f50438..de7d693c 100644 --- a/configure.ac +++ b/configure.ac @@ -850,9 +850,9 @@ AC_REPLACE_FUNCS(rename) dnl checks for c library functions AC_CHECK_FUNCS(bcopy bzero clock_gettime confstr faccessat fnmatch \ getaddrinfo gethostbyname getservbyname getservent inet_aton \ - imaxdiv memmove pathconf putenv raise random regcomp regexec \ - setenv setlinebuf setlocale setvbuf siginterrupt strchr \ - sysconf syslog tcgetattr times ttyname tzset unsetenv) + imaxdiv memmove nanosleep pathconf putenv raise random regcomp \ + regexec setenv setlinebuf setlocale setvbuf siginterrupt \ + strchr sysconf syslog tcgetattr times ttyname tzset unsetenv) AC_CHECK_FUNCS(vasprintf asprintf) AC_CHECK_FUNCS(isascii isblank isgraph isprint isspace isxdigit) diff --git a/lib/sh/ufuncs.c b/lib/sh/ufuncs.c index 247224d6..b3894450 100644 --- a/lib/sh/ufuncs.c +++ b/lib/sh/ufuncs.c @@ -82,7 +82,26 @@ falarm (unsigned int secs, unsigned int usecs) /* A version of sleep using fractional seconds and select. I'd like to use `usleep', but it's already taken */ -#if defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined (HAVE_PSELECT)) +#if defined (HAVE_NANOSLEEP) +int +fsleep(unsigned int sec, unsigned int usec) +{ + int r; + struct timespec req, rem; + + req.tv_sec = sec; + req.tv_nsec = usec * 1000; + + for (;;) +{ + QUIT; + r = nanosleep (&req, &rem); + if (r == 0 || errno != EINTR) +return r; + re
Re: [PATCH] printing multiple heredocs in list
On Fri, Jun 30, 2023, 16:02 Chet Ramey wrote: > On 6/29/23 4:01 AM, Grisha Levit wrote: > > If there are multiple commands in a row that each require printing the > > connector prior to the heredoc body, the connector ends up in the wrong > > place for commands after the first: > > OK. The parser builds lists (connections) left-side heavy. What do you > think of replacing the test for was_heredoc in your patch with > (printing_connection == 1), so if we're called recursively to print a > second connection on the left side (Connector->first), we print the here- > document body after our caller has been able to add the connector in > the right place. Oh yeah that seems like a much better solution. That way you shouldn't need to print any deferred > here-documents in make_command_string(). > Great, this was the part that made me think something was wrong with my approach. >
Completion list updates on dynamic builtin load
The `enabled' and `disabled' completion lists don't get updated when a dynamic builtin is loaded: $ compgen -A enabled tty $ enable tty $ compgen -A enabled tty $ diff --git a/builtins/enable.def b/builtins/enable.def index aa143760..399c7fa3 100644 --- a/builtins/enable.def +++ b/builtins/enable.def @@ -193,6 +193,8 @@ enable_builtin (WORD_LIST *list) result = EXECUTION_FAILURE; /* normalize return value */ #if defined (PROGRAMMABLE_COMPLETION) set_itemlist_dirty (&it_builtins); + set_itemlist_dirty (&it_enabled); + set_itemlist_dirty (&it_disabled); #endif } #endif @@ -208,6 +210,8 @@ enable_builtin (WORD_LIST *list) } #if defined (PROGRAMMABLE_COMPLETION) set_itemlist_dirty (&it_builtins); + set_itemlist_dirty (&it_enabled); + set_itemlist_dirty (&it_disabled); #endif } #endif @@ -237,6 +241,8 @@ enable_builtin (WORD_LIST *list) opt = r; #if defined (PROGRAMMABLE_COMPLETION) set_itemlist_dirty (&it_builtins); + set_itemlist_dirty (&it_enabled); + set_itemlist_dirty (&it_disabled); #endif } #endif
Re: [PATCH] allow quoting completions w/o filename treatment
On Fri, Apr 14, 2023, 17:20 Grisha Levit wrote: > > The attached patch hopefully addresses these difficulties by allowing for > the decoupling of filename-specific match handling from match quoting. > Thanks very much for merging this. It adds a new completion option `allquote' (I'm not quite happy with the > name) which controls the corresponding new readline variable > rl_all_quoting_desired. > I noticed you went with "fullquote" for the completion option name and it occurs to me that this might be a bit confusing for users because of the existing "complete_fullquote" shell option. Maybe just "quote" would work? I don't mean to bikeshed this but wanted to mention the potential overlap. >
Re: [PATCH] leak in rl_filename_completion_function
On Wed, May 31, 2023 at 6:23 PM Grisha Levit wrote: > > If rl_filename_rewrite_hook returns a new string for a filename (which > I guess only happens on macOS with bash), it is in most cases not > free-d. > diff --git a/lib/readline/complete.c b/lib/readline/complete.c > index bf7cc856..99556a35 100644 > --- a/lib/readline/complete.c > +++ b/lib/readline/complete.c > @@ -2521,6 +2521,9 @@ rl_filename_completion_function (const char > *text, int state) >convfn = dentry = 0; >while (directory && (entry = readdir (directory))) > { > + if (convfn != dentry) > +xfree (convfn); > + >convfn = dentry = entry->d_name; >convlen = dentlen = D_NAMLEN (entry); > > @@ -2572,6 +2575,9 @@ rl_filename_completion_function (const char > *text, int state) > users_dirname = (char *)NULL; > } > > + if (convfn != dentry) > +xfree (convfn); > + >return (char *)NULL; > } >else I think this one might have gotten lost among the other reports, and looking at it again it occurs to me this might be better solved by just not allocating a new string in the filename rewrite hook since rl_filename_completion_function will copy the string anyway. diff --git a/bashline.c b/bashline.c index 0e5373ab..dcf1ee1f 100644 --- a/bashline.c +++ b/bashline.c @@ -3279,12 +3279,7 @@ bash_directory_expansion (char **dirname) static char * bash_filename_rewrite_hook (char *fname, int fnlen) { - char *conv; - - conv = fnx_fromfs (fname, fnlen); - if (conv != fname) -conv = savestring (conv); - return conv; + return fnx_fromfs (fname, fnlen); } /* Functions to save and restore the appropriate directory hook */ diff --git a/lib/readline/complete.c b/lib/readline/complete.c index d531f541..974bba5e 100644 --- a/lib/readline/complete.c +++ b/lib/readline/complete.c @@ -2637,9 +2637,6 @@ rl_filename_completion_function (const char *text, int state) else temp = savestring (convfn); - if (convfn != dentry) - xfree (convfn); - return (temp); } }
give_terminal_to after re-backgrounded async job
The fix [1] for the issue reported in [2]: + give the terminal to pipeline_pgrp. We don't give the terminal + back to shell_pgrp if an async job exits because we never gave it + to that job in the first place. */ if ((flags & JWAIT_NOTERM) == 0 && running_in_background == 0 && + (job == NO_JOB || IS_ASYNC (job) == 0) && (subshell_environment & (SUBSHELL_ASYNC|SUBSHELL_PIPE)) == 0) give_terminal_to (shell_pgrp, 0); makes bash not take the terminal back when an async job is brought to the foreground with `fg' and then subsequently TSTP-ed. This is not immediately noticeable because if readline is in use, yy_readline_get will shortly do it. However, with --noediting mode the interactive shell will get EOF when reading the next command and exit. And even with readline, any attempt to read from the terminal prior to readline doing it (e.g. from PROMPT_COMMAND) will result in EIO (which is how I first noticed this). [1]: https://git.savannah.gnu.org/cgit/bash.git/commit/?h=devel&id=a37b2af9 [2]: https://lists.gnu.org/archive/html/bug-bash/2023-01/msg00057.html $ bash --norc --noediting bash-5.3$ sleep 60 & [1] 30163 bash-5.3$ fg sleep 60 ^Z # shell exits after this [1]+ Stopped sleep 60 bash-5.3$ exit There are stopped jobs. bash-5.3$ exit $ bash --norc bash-5.3$ r() { read -r -s -dR -t1 -p$'\e[6n'; } bash-5.3$ PROMPT_COMMAND=r bash-5.3$ sleep 60 & [1] 31088 bash-5.3$ fg sleep 60 ^Z [1]+ Stopped sleep 60 bash: read: read error: 0: I/O error bash-5.3$
[PATCH] normalization tweaks for macOS
A few small tweaks for the macOS-specific normalization handling to handle the issues below: Examples below are using the following setup: $ mkdir -p -- $'\303\251-C' $'e\314\201-D' $ /bin/ls -BF é-C/ é-D/ $ LC_ALL=C /bin/ls -BF e\314\201-D/ \303\251-C/ LC_ALL=C just to make the output clear, the shell's locale is UTF-8. This is on APFS, which is normalization-preserving, but similar results show up on HFS+ (which is not). (In either case, access is normalization insensitive). --- When attempting to match a direntry name to the supplied pattern, the globbing code converts the name from UTF-8-MAC encoding to (say) UTF-8. This is essentially an NFC normalization. If the converted name matches, the original name is then (correctly, I think) returned by the glob operation: $ LC_ALL=C printf '%q ' $'\303\251'* $'\303\251-C' $'e\314\201-D' However, the pattern itself does not go through normalization and unless it is already NFC-normalized, it won't match anything. So even names retrieved from globbing might not match themselves: $ for x in *; { echo "$x:" "$x"*/; } é-C: é-C/ é-D: Seems like it would be appropriate to normalize the pattern too. One maybe tricky thing here is that quoted characters in the pattern are supplied to the globbing code prefixed by a backslash, making normalization fail to combine combining characters. It's possible to adjust quote_string_for_globbing to only put in backslashes when the quoted character is special for globbing but that might complicate the code more than necessary -- I kind of cheated by just not adding a backslash if the quoted character is non-ASCII. I can't think of any way a non-ASCII character can be special in globbing code but maybe I'm not trying hard enough. --- Filename completion has a similar situation. The NFC form matches any text that normalizes to it: $ bash -in <<<$'\303\251\e*' bash-5.3$ é-C é-D But NFD text matches nothing, not even itself: $ bash -in <<<$'e\314\201\e*' bash-5.3$ é Admittedly, filename completion does not itself produce non-NFC text so it's less likely that this would be encountered, but normalizing the hint text before comparing it to normalized filenames seems easy enough. BTW glob-expand-word can result in NFD text on the input line but that seems correct since it's what globbing produces. --- If filename completion is invoked through `compgen', it behaves differently in scripts since bashline.c:initialize_readline hasn't had a chance to set rl_filename_rewrite_hook. $ bash -c $'compgen -f -- \303\251' é-C $ bash -c $'compgen -f -- e' é-D This can be worked around by calling `bind' manually, resulting in the same behavior as in an interactive shell: $ bash -c $'bind; compgen -f \303\251' é-C é-D $ bash -c $'bind; compgen -f -- e' $ ..but seems safe enough to set the hook from compgen directly as well. From 6b2c8b478e4a4077b6986ab1fc44a14c739f3459 Mon Sep 17 00:00:00 2001 From: Grisha Levit Date: Wed, 5 Jul 2023 22:15:34 -0400 Subject: [PATCH] fnxform tweaks * lib/glob/glob.c - glob_vector: apply fnx_fromfs to pattern to make sure it can match normalized filenames * pathexp.c - quote_string_for_globbing: don't convert CTLESC to backslash if it was quoting a non-ascii character. cheap solution to keep the bytes of multibyte characters together for fnx_fromfs. * lib/readline/complete.c - rl_filename_completion_function: if rl_filename_rewrite_hook is set, use it to normalize the filename we are matching * bashline.[ch],builtins/complete.def - set_filename_rewrite_hook: new function to set the bash-specific value of rl_filename_rewrite_hook, used by compgen builtin to ensure correct results even if readline has not be initialized --- bashline.c | 6 ++ bashline.h | 1 + builtins/complete.def | 2 ++ lib/glob/glob.c | 11 ++- lib/readline/complete.c | 11 +++ pathexp.c | 5 - 6 files changed, 34 insertions(+), 2 deletions(-) diff --git a/bashline.c b/bashline.c index 0e5373ab..9e2e9260 100644 --- a/bashline.c +++ b/bashline.c @@ -3287,6 +3287,12 @@ bash_filename_rewrite_hook (char *fname, int fnlen) return conv; } +void +set_filename_rewrite_hook (void) +{ + rl_filename_rewrite_hook = bash_filename_rewrite_hook; +} + /* Functions to save and restore the appropriate directory hook */ /* This is not static so the shopt code can call it */ void diff --git a/bashline.h b/bashline.h index d1ef55e8..ce7dc520 100644 --- a/bashline.h +++ b/bashline.h @@ -57,6 +57,7 @@ extern int unbind_unix_command (char *); extern char **bash_default_completion (const char *, int, int, int, int); extern void set_directory_hook (void); +extern void set_filename_rewrite_hook (void); /* Used by programmable completi
leak in command_word_completion_function
If there a glob is expanded to more than one result while attempting to complete the command word, the matches are discarded but not freed. diff --git a/bashline.c b/bashline.c index 0e5373ab..07f38e62 100644 --- a/bashline.c +++ b/bashline.c @@ -2192,7 +2192,11 @@ globword: local_index = 0; if (glob_matches[1] && rl_completion_type == TAB) /* multiple matches are bad */ - return ((char *)NULL); + { + strvec_dispose (glob_matches); + glob_matches = (char **)NULL; + return ((char *)NULL); + } }
[PATCH] print regerror string on regcomp error
Since bash-5.3 now shows an error message when a regular expression can't be compiled, I thought it might be useful to add the regerror()-supplied string that provides more specifics on the failure, so we can get messages like: $ [[ x =~ [z-a] ]] bash: [[: invalid regular expression `[z-a]': invalid character range 0001-print-regerror-string-on-regcomp-error.patch Description: Binary data
[PATCH 1/2] <<# indent-stripping heredoc
This patch implements the ksh93-style <<# redirection operator to enable indentatable heredocs. This (or similar) functionality has been requested a few times, most recently discussed at https://lists.gnu.org/archive/html/bug-bash/2021-09/msg0.html The behavior for heredocs started with the <<# redirection operator is: * Any leading blank characters on the first line of the heredoc define the indentation of the heredoc * Tab characters in indentation are interpreted as indenting to the next 8-column-wide tab stop * All lines of the heredoc are stripped of (up to) as many columns of indentation as found on the first line * The heredoc is terminated when, after any indentation has been removed, the remaining line exactly matches the delimiter There is a small (and not very interesting) edge case: if the heredoc delimiter itself has leading blanks (e.g. <<# " EOF"), then the first line cannot terminate the heredoc (even if it exactly matches the delimiter) because any leading blanks on the first line are treated as defining the heredoc's indentation and so cannot form part of the line. This matches the ksh93 behavior. As pointed out by Lawrence Velázquez in the last discussion, <<# is currently a syntax error except for the case of `shopt -u interactive_comments' in an interactive shell. I don't think it's likely that users are turning off interactive comments and starting heredoc delimiters with `#' but it would be a trivial change to the code to not recognize <<# docs in that circumstance, should we want to go that route. 0001-indent-stripping-heredoc.patch Description: Binary data
[PATCH 2/2] <<# indent-stripping heredoc: indented printing
Enable prettier-printing of <<# heredocs by printing the body indented one level further than surrounding text and the final delimiter at the same indentation level as the surrounding text. 0002-indent-stripping-heredoc-indented-printing.patch Description: Binary data
Re: [PATCH 1/2] <<# indent-stripping heredoc
On Fri, Jul 14, 2023 at 3:44 AM Martin D Kealey wrote: > > On the whole I think this is great, and thankyou for working up the patch, > but I would like to offer some comments and suggestions: Thanks for looking at it, feedback very much appreciated. > One option that some other languages use is to find the terminator, and then > use its indentation as the pattern to remove from the content lines. The > problem of course is that it would take a double run over the content, but > the benefit is that there'd only be one in-band signaling line instead of two. I agree, this would be useful functionality, would be interested in seeing your implementation. > Secondly, the battle for 8-space tabs has been well and truly lost at this > point, so hard-coding that constant feels like it's likely to be a source of > errors. > > In order to be tab-agnostic, I can see two reasonable options: > 1. remove only an exact match for the sequence of whitespace characters that > occurs in the indicator line I think a problem with this is that a coding style using mixed tabs/spaces (such as the bash source..) would have foo EOF saved as: [tab]foo [spc][spc][spc][spc][spc][spc]EOF so I don't think it's feasible to record the indent level as a fixed tab/space mix if we are to allow indentation deeper into the heredoc using the document's native style, at least not without ending up back at the problem of needing to pick a tab stop width to use. Personally, I don't use this indentation style (but don't wish to precipitate an argument everyone is surely tired of) so I don't have much of an opinion on what would be useful here. One slight point in favor of keeping the 8-space wide tab approach is that this is how readline renders literal tabs as well.
printf %ls conversion
The mbsrtowcs call here doesn't convert the final \0 of mbs, leaving the final byte of ws as whatever was just malloc-ed. Noticed in an ASAN build which makes sure that this is never L'\0'. Oddly, this didn't actually trigger an ASAN report, just saw that nothing was getting printed by printf. --- diff --git a/builtins/printf.def b/builtins/printf.def index ad4f4d12..62820514 100644 --- a/builtins/printf.def +++ b/builtins/printf.def @@ -1493,7 +1493,7 @@ getwidestr (size_t *lenp) mbs = garglist->word->word; slen = strlen (mbs); ws = (wchar_t *)xmalloc ((slen + 1) * sizeof (wchar_t)); - mblength = mbsrtowcs (ws, &mbs, slen, &state); + mblength = mbsrtowcs (ws, &mbs, slen + 1, &state); if (lenp) *lenp = mblength;
Re: [PATCH] print regerror string on regcomp error
On Mon, Jul 17, 2023, 12:09 Chet Ramey wrote: > I guess we'll see how much the text of regerror(3) > error messages varies across regexp implementations. > Is that a concern? The GNU strings [1] are translated and even their untranslated text does differ from that of the BSD ones [2]. The set of errors is also different, e.g. REG_EMPTY is only in BSD because its regcomp() does not accept `()' while GNU's does. But it's precisely this difference that I think would help clue users into why their scripts work on one platform and not another. [1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=posix/regcomp.c;h=12650714c06890d99a6a7aaa96437a2f83cc973b;hb=HEAD#l132 [2]: https://cgit.freebsd.org/src/tree/lib/libc/regex/regerror.c?id=8a16b7a18f5d0b031f09832fd7752fba717e2a97#n90
Re: [PATCH] normalization tweaks for macOS
On Mon, Jul 17, 2023 at 3:29 PM Chet Ramey wrote: > > On 7/7/23 5:05 PM, Grisha Levit wrote: > > A few small tweaks for the macOS-specific normalization handling to > > handle the issues below: > > The issue is that the behavior has to be different between cases where > the shell is reading input from the terminal and gets NFC characters > that need to be converted to NFD (which is how HFS+ and APFS store them) > and when the shell is reading input from a file and doesn't need to (and > should not) do anything with NFD characters. NB: while HFS+ stores NFD names, APFS preserves normalization, so we can get either NFC or NFD text back from readdir. Both are normalization-insensitive: "Being normalization-insensitive ensures that normalization variants of a filename cannot be created in the same directory, and that a filename can be found with any of its normalization variants." [1] Currently, Bash never actually converts to NFD. The fnx_tofs() function is there but it is never used. Instead, Bash converts filenames to NFC with fnx_fromfs() before comparing with either the glob pattern or the completion hint text (which is never converted). Since access is normalization-insensitive, we just need to normalize to _some_ form, so going to NFC is fine, but if we're going to do that we should normalize both the filesystem name and the text being compared. If there's a match, globs expand to the filenames (NFC or NFD) as returned by readdir(), and Readline completes with NFC-normalized versions of the names. I think this makes sense. What doesn't work quite right currently though is that glob patterns with NFD text never match anything, and completion prefixes with NFD text never expand to anything. [1]: https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/APFS_Guide/FAQ/FAQ.html > Does iconv work when taking NFD input that came from the file system and > trying to convert it to NFD (UTF-8-MAC)? I've honestly never checked. Converting to UTF-8-MAC always normalizes to NFD: $ printf '\303\251\0\145\314\201' | iconv -f UTF-8-MAC -t UTF-8-MAC | od -b -An 145 314 201 000 145 314 201 $ printf '\303\251\0\145\314\201' | iconv -f UTF-8 -t UTF-8-MAC | od -b -An 145 314 201 000 145 314 201 But Bash only converts from UTF-8-MAC to UTF-8, which always normalizes to NFC: $ printf '\303\251\0\145\314\201' | iconv -f UTF-8-MAC -t UTF-8 | od -b -An 303 251 000 303 251
[PATCH] fix bind -X quoting
The output of `bind -X' is not reusable if the bound command has quotes, backslashes, etc. $ bind -x '"\eX": echo "x"' $ bind -X "\eX": "echo \"x\"" $ bind -x "$(bind -X)" $ bind -X "\eX": "echo \\\"x\\\"" This patch changes rl_macro_dumper to not untranslate the macro body when passed a negative print_readably argument. This is technically an API change, so maybe not the best fix, though I doubt it will impact any real usage. --- bashline.c | 2 +- lib/readline/bind.c | 6 +- lib/readline/doc/rltech.texi | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/bashline.c b/bashline.c index 2fb00e82..e2b329f9 100644 --- a/bashline.c +++ b/bashline.c @@ -4583,7 +4583,7 @@ print_unix_command_map (void) save = rl_get_keymap (); cmd_xmap = get_cmd_xmap_from_keymap (save); rl_set_keymap (cmd_xmap); - rl_macro_dumper (1); + rl_macro_dumper (-1); rl_set_keymap (save); return 0; } diff --git a/lib/readline/bind.c b/lib/readline/bind.c index ee6d6e17..bae0e6cc 100644 --- a/lib/readline/bind.c +++ b/lib/readline/bind.c @@ -2861,7 +2861,11 @@ _rl_macro_dumper_internal (int print_readably, Keymap map, char *prefix) { case ISMACR: keyname = _rl_get_keyname (key); - out = _rl_untranslate_macro_value ((char *)map[key].function, 0); + + if (print_readably < 0) + out = savestring ((char *)map[key].function); + else + out = _rl_untranslate_macro_value ((char *)map[key].function, 0); if (print_readably) fprintf (rl_outstream, "\"%s%s\": \"%s\"\n", prefix ? prefix : "", diff --git a/lib/readline/doc/rltech.texi b/lib/readline/doc/rltech.texi index 4d3f3cca..83a34a5f 100644 --- a/lib/readline/doc/rltech.texi +++ b/lib/readline/doc/rltech.texi @@ -1354,8 +1354,10 @@ use @code{rl_generic_bind()} instead. @deftypefun void rl_macro_dumper (int readable) Print the key sequences bound to macros and their values, using the current keymap, to @code{rl_outstream}. -If @var{readable} is non-zero, the list is formatted in such a way +If @var{readable} is greater than zero, the list is formatted in such a way that it can be made part of an @code{inputrc} file and re-read. +If @var{readable} is less than zero, the macros are printed in "translated" +form. @end deftypefun @deftypefun int rl_variable_bind (const char *variable, const char *value) -- 2.41.0
Re: [PATCH] normalization tweaks for macOS
On Tue, Jul 18, 2023 at 9:55 AM Chet Ramey wrote: > Unicode normalization on macOS has always been a pain in the ass. I can see that! > This is the basic assumption that drives all the decisions: character input > you get from the terminal is in NFC, and files from the file system (names > and usually contents) are in NFD. I guess the point of this patch was that this assumption does not always hold -- neither file system names nor terminal input have any guaranteed normalization. I'll just give one more example, sorry if this is repetitive. In Finder, create two directories: señor and señora. List the parent directory in a Terminal window: $ ls señor señora Type `echo ' and then copy-paste the first filename from `ls' output: $ echo señor Note that tab completion doesn't work. Append a `*' and run the command: $ echo señor* señor* And if you're writing a script you have the same issue: $ bash -O failglob -c 'set -- *; echo "$1"*' bash: line 1: no match: señor* I'm not sure if you're saying that this is correct behavior, or if it's not worth fixing (I'm fine with that), or if there's something wrong with my proposed fix. > > NB: while HFS+ stores NFD names, APFS preserves normalization, so we > > can get either NFC or NFD text back from readdir. > > Well, that doesn't help. But I haven't seen any NFC text coming back from > readdir on any of my macs. > > > Currently, Bash never actually converts to NFD. The fnx_tofs() > > function is there but it is never used. Instead, Bash converts > > filenames to NFC with fnx_fromfs() before comparing with either the > > glob pattern or the completion hint text (which is never converted). > > Correct. It's a one-way conversion, since you only have to convert one > of the two different forms, and the current implementation works on text > entered interactively (which is in NFC). When you're reading a script, you > don't have to perform any conversion at all; your NFD examples all work > fine when run from a script. > > > Since access is normalization-insensitive, we just need to normalize > to > > _some_ form, so going to NFC is fine, but if we're going to do that > > we should normalize both the filesystem name and the text being > > compared. > > The idea is that since the text entered interactively at the terminal is > already in NFC, the curent implementation converts only what it knows is > coming from the keyboard. Did you mean "coming from the filesystem"? There are two places where fnx_fromfs is called: bash_filename_rewrite_hook, and glob_vector. Both of these operate on directory entries, not on keyboard input. > > If there's a match, globs expand to the filenames (NFC or NFD) as > > returned by readdir(), and Readline completes with NFC-normalized > > versions of the names. I think this makes sense. > > Because NFC is what you get from terminal input. > > > What doesn't work quite right currently though is that glob patterns > > with NFD text never match anything, and completion prefixes with NFD > > text never expand to anything. > > When entered from the terminal. It goes back to the basic assumption: NFC > is what you get from the terminal, so you have to convert from the file > system normalization form when you're sure what you want to compare is > coming from the terminal.
Re: [PATCH] fix bind -X quoting
On Wed, Jul 19, 2023 at 10:47 AM Chet Ramey wrote: > Thanks for the report. It seems like your patch is incomplete, though. > After applying it: > > $ bind -x '"\eX": echo "x"' > $ bind -X > "\eX": "echo "x"" > > We probably need to suppress printing the double quotes around `out' if > print_readably < 0. Oh good point. I guess we'd need to also print a backslash if the first character of the command is a quote.
Re: slash appended to tab so its two // at end
On Wed, Jul 19, 2023 at 9:07 AM Chet Ramey wrote: > > On 7/18/23 10:01 PM, alex xmb ratchev wrote: > > i in 5.2.15 bash aarch64 termux did > > > > $ cp -ap db2.*/ > > > > and got > > > > db2.i5// db2.i7// > > I can't reproduce this. This is a combination of either visible-stats or mark-directories being on, filename completion being active, and the word being completed ending with a slash and referring to an existing directory. $ INPUTRC=/dev/null bash --norc bash-5.3$ cd /tmp bash-5.3$ mkdir bin bld bash-5.3$ cd b*/ bin// bld// bash-5.3$ complete -W 'bin/ bld/' -o filenames x bash-5.3$ x b bin// bld// Also I think this is not a bug -- readline is asked to append a slash to directory names and it does so. Same thing happens with e.g. `ls', when asked to append a slash to directory names and the supplied name already has a slash: $ ls -dp /var/ /var//
Re: slash appended to tab so its two // at end
On Wed, Jul 19, 2023 at 12:57 PM Grisha Levit wrote: > Also I think this is not a bug -- readline is asked to append a slash > to directory names and it does so. It's easy enough to avoid printing the indicator slash if we just printed a path already ending in a slash. It's an improvement cosmetically but I think correctness can be argued either way. diff --git a/lib/readline/complete.c b/lib/readline/complete.c index 4b3a4236..d0c6a8eb 100644 --- a/lib/readline/complete.c +++ b/lib/readline/complete.c @@ -1059,6 +1059,10 @@ print_filename (char *to_print, char *full_pathname, int prefix_bytes) } xfree (s); + + if (extension_char == '/' && (s = strrchr (to_print, '/')) && s[1] == 0) +extension_char = 0; + if (extension_char) { putc (extension_char, rl_outstream);
[PATCH] add libintl dep for unwind_prot
(so make -j works) diff --git a/Makefile.in b/Makefile.in index 7f990bfa..ce909bc6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1440,6 +1440,7 @@ sig.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h siglist.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h subst.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h test.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h +unwind_prot.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h trap.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h variables.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h version.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
Re: problem anomalies , possibly aliases related
On Thu, Jul 20, 2023, 01:42 alex xmb ratchev wrote: > > > 2. it says [[ ! -d then ' continue ' .. where is cp > i call no , not c , ... > 1. cp missing > 2. the [[ ! -d return to continue looks bug wrong > Try putting the code that uses the alias into a function, and then print the function definition. You'll see how it's being expanded. Using Martin's example: $ alias A='B ; C' $ f() { D && A; } $ declare -p -f f f () { D && B; C } Sounds like you want all the commands in the alias to be executed as a group -- so you can just write it as one: alias bad='{ echo fail; continue; }' >
Re: [PATCH] normalization tweaks for macOS
On Thu, Jul 20, 2023 at 11:54 AM Chet Ramey wrote: > So I'll go ahead with your patch, starting with the globbing changes. Thanks! BTW, changing quote_string_for_globbing to skip escaping characters that don't need it makes globbing >2x faster in the case that most of the pattern is a quoted string. This patch only skips over non-ASCII characters (because that's all that was needed for the normalization to work) but I wonder if you'd be interested in changes to the function that would skip escaping ASCII characters that aren't glob-special as well. I _think_ it would just be a slight variation of the code that already does this for regexes but I haven't thought about the edge cases carefully yet. mkdir /tmp/new && cd /tmp/new touch {0001..1000} printf -v str %1000s; export str=${str// /π} export TIMEFORMAT=%3lR bash-devel -c 'time for i in {1..100}; { : "$str"*; }' 0m5.549s bash-devel -c 'time for i in {1..100}; { : $str*; }' 0m1.936s bash-patch -c 'time for i in {1..100}; { : "$str"*; }' 0m1.948s bash-patch -c 'time for i in {1..100}; { : $str*; }' 0m1.926s
Re: [PATCH] normalization tweaks for macOS
On Mon, Jul 24, 2023 at 10:12 AM Chet Ramey wrote: > > On 7/20/23 7:52 PM, Grisha Levit wrote: > > > I wonder if you'd be interested in changes > > to the function that would skip escaping ASCII characters that aren't > > glob-special as well. I _think_ it would just be a slight variation > > of the code that already does this for regexes but I haven't thought > > about the edge cases carefully yet. > > I agree that it would only be a slight variation on that code, and so I > added that case. It will be in the next devel branch push. Re latest changes in [1], we need to preserve quoting also for any of the following characters, at least if they are in a bracket expression: ! - . : = ^ Attached patch does so but is probably more complicated than necessary. Also, there's a mention of changes to complete.def in the changelog but it looks like the actual changes didn't make it into the commit. [1]: https://git.savannah.gnu.org/cgit/bash.git/diff/pathexp.c?h=devel&id=8418224f3 0001-preserve-quoting-for-chars-special-in-bracket-expr.patch Description: Binary data
[PATCH] read: non-raw-mode fixes
This patches addresses a few issues with `read' when not in raw mode. If the last character read was an (unescaped) backslash, store it as such instead of as a CTLESC. Avoids: $ printf '\\' | { read; echo "${REPLY@Q}"; } bash: DEBUG warning: dequote_string: string with bare CTLESC $'\001' If an escaped null byte is read, skip it as we do with an unescaped one, instead of adding it to input_string. Avoids: $ printf 'A\\\0B\nC\n' | while read; do echo "${REPLY@Q}"; done 'A' 'C' $ printf '\\\0' | { read; echo "${REPLY@Q}"; } bash: DEBUG warning: dequote_string: string with bare CTLESC $'\001' # even after fix for first issue If IFS contains \177 and the input consists of only backslash-newline pairs and a sole \177, prevent the bare CTLNUL from being turned into an empty string. Avoids: $ printf '\\\n\177' | { IFS=$'\177' read; echo "${REPLY@Q}"; } '' --- builtins/read.def | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/builtins/read.def b/builtins/read.def index 5b2621fe..84878699 100644 --- a/builtins/read.def +++ b/builtins/read.def @@ -404,7 +404,7 @@ read_builtin (WORD_LIST *list) input_string[0] = '\0'; pass_next = 0; /* Non-zero signifies last char was backslash. */ - saw_escape = 0; /* Non-zero signifies that we saw an escape char */ + saw_escape = 0; /* Index+1 of when we last saw an escape char. */ /* More input and options validation */ if (nflag == 1 && nchars == 0) @@ -751,11 +751,11 @@ read_builtin (WORD_LIST *list) if (pass_next) { pass_next = 0; - if (c == '\n') + if (c == '\n' || c == '\0') { if (skip_ctlesc == 0 && i > 0) i--;/* back up over the CTLESC */ - if (interactive && input_is_tty && raw == 0) + if (interactive && input_is_tty && raw == 0 && c == '\n') print_ps2 = 1; } else @@ -769,8 +769,8 @@ read_builtin (WORD_LIST *list) pass_next++; if (skip_ctlesc == 0) { - saw_escape++; input_string[i++] = CTLESC; + saw_escape=i; } continue; } @@ -783,8 +783,8 @@ read_builtin (WORD_LIST *list) if ((skip_ctlesc == 0 && c == CTLESC) || (skip_ctlnul == 0 && c == CTLNUL)) { - saw_escape++; input_string[i++] = CTLESC; + saw_escape=i; } add_char: @@ -825,6 +825,12 @@ add_char: if (nchars > 0 && nr >= nchars) break; } + + if (i && saw_escape == i && input_string[i-1] == CTLESC) +input_string[i-1] = '\\'; /* Preserve trailing backslash */ + else if (skip_ctlnul && i == 1 & saw_escape == 1 && input_string[0] == CTLNUL) +saw_escape = 0;/* Avoid dequoting bare CTLNUL */ + input_string[i] = '\0'; check_read_timeout (); -- 2.41.0 0001-read-non-raw-mode-fixes.patch Description: Binary data
Re: [PATCH] fix bind -X quoting
On Wed, Jul 26, 2023, 16:06 Chet Ramey wrote: > > On 7/24/23 1:13 PM, Chet Ramey wrote: > > > You could do it if you allowed, say > > > > bind -x '"\eX": \"command with spaces\" \"x\"' > > > > and then stripped the backslashes before calling rl_generic_bind, but > > that's not exactly backwards compatible either. > > Thinking about it some more, you can do it like this: > > bind -x $'"\\eX": \'"command with spaces" "x"\'' > > since bind -x allows single-quoted strings as the command to execute, > and $'...' allows backslash-escaped single quotes. > > If we ran the command string through rl_translate_keyseq, it would allow > backslash-escaped double quotes and strip the backslashes, but you get > the rest of the backslash processing that you probably don't want. > > It's just not transitive. Another issue I didn't think of with printing the unquoted translated command is that it can include newlines, which is a problem since you have to read the `bind -X' output one line at a time to reuse it with `bind -x'. If there isn't a backwards compatible way to produce output that is reusable given the current input format, I wonder if we can leverage a format that's not currently valid as input. `bind -x' currently requires a colon following the key sequence but we could change it to also allow input without it and use rl_macro_bind instead of rl_generic_bind if we get such input. If we have `bind -X' produce untranslated output as it did before, but without the `:', everything should match up and existing valid `bind -X' commands will be unaffected. --- diff --git a/bashline.c b/bashline.c index 5dac2e9e..9d99c536 100644 --- a/bashline.c +++ b/bashline.c @@ -4702,7 +4702,7 @@ bind_keyseq_to_unix_command (char *line) { Keymap kmap, cmd_xmap; char *kseq, *value; - int i, kstart; + int i, kstart, translate; kmap = rl_get_keymap (); @@ -4716,16 +4716,13 @@ bind_keyseq_to_unix_command (char *line) /* Create the key sequence string to pass to rl_generic_bind */ kseq = substring (line, kstart, i); - for ( ; line[i] && line[i] != ':'; i++) + /* Advance to the colon (:) or whitespace which separates the two objects. */ + for ( ; line[i] && line[i] != ':' && line[i] != ' ' && line[i] != '\t'; i++) ; - if (line[i] != ':') -{ - builtin_error (_("%s: missing colon separator"), line); - FREE (kseq); - return -1; -} - i = isolate_sequence (line, i + 1, 0, &kstart); + translate = (line[i] != ':'); + + i = isolate_sequence (line, i + 1, translate, &kstart); if (i < 0) { FREE (kseq); @@ -4737,7 +4734,10 @@ bind_keyseq_to_unix_command (char *line) /* Save the command to execute and the key sequence in the CMD_XMAP */ cmd_xmap = get_cmd_xmap_from_keymap (kmap); - rl_generic_bind (ISMACR, kseq, value, cmd_xmap); + if (translate) +rl_macro_bind (kseq, value, cmd_xmap); + else +rl_generic_bind (ISMACR, kseq, value, cmd_xmap); /* and bind the key sequence in the current keymap to a function that understands how to execute from CMD_XMAP */ diff --git a/lib/readline/bind.c b/lib/readline/bind.c index dc30dd84..9d4817a3 100644 --- a/lib/readline/bind.c +++ b/lib/readline/bind.c @@ -2861,18 +2861,12 @@ _rl_macro_dumper_internal (int print_readably, Keymap map, char *prefix) { case ISMACR: keyname = _rl_get_keyname (key); - if (print_readably < 0) - out = savestring ((char *)map[key].function); - else - out = _rl_untranslate_macro_value ((char *)map[key].function, 0); + out = _rl_untranslate_macro_value ((char *)map[key].function, 0); - if (print_readably < 0) - fprintf (rl_outstream, "\"%s%s\": %s\n", prefix ? prefix : "", -keyname, -out ? out : ""); - else if (print_readably > 0) - fprintf (rl_outstream, "\"%s%s\": \"%s\"\n", prefix ? prefix : "", + if (print_readably) + fprintf (rl_outstream, "\"%s%s\"%s \"%s\"\n", prefix ? prefix : "", keyname, +print_readably > 0 ? ":" : "", out ? out : ""); else fprintf (rl_outstream, "%s%s outputs %s\n", prefix ? prefix : "",