Re: enhancement merge request

2021-04-18 Thread Grisha Levit
On Sun, Apr 18, 2021, 7:13 PM Ananth Chellappa  wrote:

> Far as I understand, there is no
> way to accomplish what I want - concisely : *get a true private mode* (no
> logging to HISTFILE *OR* recall with history command after exiting
> private-mode (toggle of history using set -/+ o) *without sacrificing
> productivity*.
>

I think you can do something similar to the patch directly in the shell by
temporarily unsetting HISTFILE and then deleting history entries in a
certain range and restoring HISTFILE when you are ready, like:

hist_pause() {
[[ -z ${pause_histcmd-} ]] || return
pause_histcmd=$HISTCMD
old_histfile=$HISTFILE
history -a # in case we exit w/o hist_resume
HISTFILE=
}
hist_resume() {
[[ -n ${pause_histcmd-} ]] || return
local i
for ((i=HISTCMD; i>pause_histcmd; i--)); do
history -d $i
done
unset pause_histcmd
HISTFILE=$old_histfile
}

Or, alternatively, unset HISTFILE, write out the current history to a
temporary file (`history -w $file') and then later clear the history
(`history -c'), read in the temporary file (`history -r $file'), and
restore HISTFILE.

The latter approach might be slightly more robust but, note that the
history list will be re-numbered to start at 1 after restoring.

>


segfault on history-search-*

2023-02-28 Thread Grisha Levit
history-search-* commands segfault on the devel branch since the size_t
changes

---
 lib/readline/search.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/readline/search.c b/lib/readline/search.c
index b7be876f..525c9c69 100644
--- a/lib/readline/search.c
+++ b/lib/readline/search.c
@@ -621,7 +621,7 @@ rl_history_search_reinit (int flags)
   if (rl_point)
 {
   /* Allocate enough space for anchored and non-anchored searches */
-  if (_rl_history_search_len >= history_string_size - 2)
+  if (_rl_history_search_len + 2 >= history_string_size)
{
  history_string_size = _rl_history_search_len + 2;
  history_search_string = (char *)xrealloc (history_search_string,
history_string_size);


undo list free crash

2023-02-28 Thread Grisha Levit
After the changes in
https://git.savannah.gnu.org/cgit/bash.git/commit/?h=devel&id=9e3495c9,
I sometimes get segfaults when performing a bunch of history
navigations followed by ^C.

Small reproducer from some input fuzzing and debugger output below.


cat >/tmp/hist <<'EOF'
X
XX
EOF

cat >/tmp/brc <<'EOF'
bind -x '"\eI": kill -INT 0'
EOF

cat >/tmp/irc <<'EOF'
"\e[A": history-search-backward
set history-preserve-point on
set revert-all-at-newline on
EOF

HISTFILE=/tmp/hist INPUTRC=/tmp/irc "$BASH" --rcfile /tmp/brc -i < <(
printf 'XX'
printf '\e[D\e[D\e[C\e[B\e[D\e[A\e[D\e[A\e[B\e[C\e[D\e[A\e[D\e[A'
printf '\eI'
)


* thread #1, queue = 'com.apple.main-thread', stop reason =
EXC_BAD_ACCESS (code=1, address=0x3567a068f1b8)
frame #0: 0x0001045ce170 bash`rl_do_undo at undo.c:186:25
   183
   184   /* To better support vi-mode, a start or end value of -1 means
   185 rl_point, and a value of -2 means rl_end. */
-> 186   if (rl_undo_list->what == UNDO_DELETE ||
rl_undo_list->what == UNDO_INSERT)
   187 {
   188   start = TRANS (rl_undo_list->start);
   189   end = TRANS (rl_undo_list->end);
Target 0: (bash) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason =
EXC_BAD_ACCESS (code=1, address=0x3567a068f1b8)
  * frame #0: 0x0001045ce170 bash`rl_do_undo at undo.c:186:25
frame #1: 0x0001045d82f0 bash`_rl_revert_previous_lines at misc.c:468:6
frame #2: 0x0001045d83a4 bash`_rl_revert_all_lines at misc.c:498:3
frame #3: 0x0001045a47bc
bash`readline_internal_teardown(eof=1) at readline.c:507:5
frame #4: 0x0001045a4468 bash`readline_internal at readline.c:740:11
frame #5: 0x0001045a4320 bash`readline(prompt="bash-5.2$ ") at
readline.c:387:11
frame #6: 0x0001044bf6b0 bash`yy_readline_get at parse.y:1564:31
frame #7: 0x0001044c58f0 bash`yy_getc at parse.y:1501:10
frame #8: 0x0001044c6290
bash`shell_getc(remove_quoted_newline=1) at parse.y:2396:8
frame #9: 0x0001044c4b48 bash`read_token(command=0) at parse.y:3436:23



global-buffer-overflow in parse.y

2023-03-02 Thread Grisha Levit
$ ./bash -c 'case x in x) if ((1)); then :; fi ;; esac'
parse.y:974:82: runtime error: index -1 out of bounds for type 'int[257]'
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior parse.y:974:82 in
=
==52960==ERROR: AddressSanitizer: global-buffer-overflow
READ of size 4 at 0x000100cf26dc thread T0
#0 0x1004b63c8 in yyparse parse.y:974

$ ./bash -c 'case x in x) if ((1)); then :; fi esac'
parse.y:979:82: runtime error: index -1 out of bounds for type 'int[257]'
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior parse.y:979:82 in
=
==52850==ERROR: AddressSanitizer: global-buffer-overflow
READ of size 4 at 0x000100b0e6dc thread T0
#0 0x1002d2808 in yyparse parse.y:979

both of these are like:

|CASE WORD newline_list IN case_clause ESAC
{
$$ = make_case_command ($2, $5, word_lineno[word_top]);
if (word_top >= 0) word_top--;
}

and word_top == -1



asan report in bash_add_history

2023-03-07 Thread Grisha Levit
./bash --norc -in <<<$'\\\n.'

bashhist.c:899:8: runtime error: addition of unsigned offset to
0x00010700d190 overflowed to 0x00010700d18f

ERROR: AddressSanitizer: heap-buffer-overflow on address
0x00010700d18f at pc 0x0001045fe1b8 bp 0x00016bb1f350 sp
0x00016bb1f348
READ of size 1 at 0x00010700d18f thread T0

frame #5: 0x0001045fe1b8 bash`bash_add_history(line=".") at
bashhist.c:899:8
frame #6: 0x0001045fd0c8 bash`maybe_add_history(line=".") at
bashhist.c:759:2
frame #7: 0x0001045fca34 bash`pre_process_line(line=".",
print_changes=1, addit=1) at bashhist.c:628:5
frame #8: 0x00010432df50
bash`shell_getc(remove_quoted_newline=1) at parse.y:2508:17
frame #9: 0x00010432786c bash`read_token(command=0) at parse.y:3432:23

(lldb) fr s 5
frame #5: 0x0001045fe1b8 bash`bash_add_history(line=".") at bashhist.c:899:8
   896   curlen = strlen (current->line);
   897
   898   if (dstack.delimiter_depth == 0 && current->line[curlen - 1] == '\\' &&
-> 899   current->line[curlen - 2] != '\\')
   900 {
   901   current->line[curlen - 1] = '\0';
   902   curlen--;

(lldb) fr v current->line curlen
(char *) current->line = 0x00010700d190 "\\"
(size_t) curlen = 1



asan report in spname

2023-03-07 Thread Grisha Levit
happens when attempting spell-correct-word on an empty line

./bash --norc -in <<<$'\030s'

ERROR: AddressSanitizer: heap-buffer-overflow on address
0x000102e0d0d1 at pc 0x0001004ccf64 bp 0x00016fdf0e30 sp
0x00016fdf0e28
READ of size 1 at 0x000102e0d0d1 thread T0

frame #5: 0x0001004ccf64 bash`spname(oldname="", newname="")
at spell.c:78:8
frame #6: 0x0001004cde0c bash`dirspell(dirname="") at spell.c:195:11
frame #7: 0x000100335a10
bash`bash_spell_correct_shellword(count=1, key=115) at
bashline.c:1348:16
frame #8: 0x0001004fe830 bash`_rl_dispatch_subseq(key=115,
map=0x0001007e0360, got_subseq=0) at readline.c:922:8
frame #9: 0x0001005009dc bash`_rl_dispatch_subseq(key=24,
map=0x0001007ddb20, got_subseq=0) at readline.c:1068:8
frame #10: 0x0001004fc434 bash`_rl_dispatch(key=24,
map=0x0001007ddb20) at readline.c:866:10

(lldb) fr s 5
frame #5: 0x0001004ccf64 bash`spname(oldname="", newname="") at spell.c:78:8
   75if (*op == '\0')/* Exact or corrected */
   76  {
   77/* `.' is rarely the right thing. */
-> 78if (oldname[1] == '\0' && newname[1] == '\0' &&
   79  oldname[0] != '.' && newname[0] == '.')
   80  return -1;
   81return strcmp(oldname, newname) != 0;

(lldb) fr v oldname
(char *) oldname = 0x000102e0d0d0 ""



asan report+fix in sh_mkdoublequoted

2023-03-07 Thread Grisha Levit
mkdir -p /tmp/bin
>'/tmp/bin/$'
chmod +x '/tmp/bin/$'
PATH=/tmp/bin ./bash --norc -in <<<$'\e*'

ERROR: AddressSanitizer: heap-buffer-overflow on address
0x0001039a9913 at pc 0x0001004d57b4 bp 0x00016fdf1350 sp
0x00016fdf1348
WRITE of size 1 at 0x0001039a9913 thread T0

frame #5: 0x0001004d57b4 bash`sh_mkdoublequoted(s="", slen=1,
flags=1) at shquote.c:211:6
frame #6: 0x0001003410e4 bash`bash_quote_filename(s="$",
rtype=1, qcp="") at bashline.c:4301:15
frame #7: 0x000100554b30
bash`make_quoted_replacement(match="$", mtype=1, qc="") at
complete.c:1797:16
frame #8: 0x000100549aec
bash`insert_all_matches(matches=0x000106600200, point=0, qc="") at
complete.c:1945:9
frame #9: 0x00010053c63c
bash`rl_complete_internal(what_to_do=42) at complete.c:2144:7
frame #10: 0x00010053d450 bash`rl_insert_completions(ignore=1,
invoking_key=42) at complete.c:466:11

frame #5: 0x0001004d57b4 bash`sh_mkdoublequoted(s="", slen=1,
flags=1) at shquote.c:211:6
   208   *r++ = *s++;
   209 }
   210   *r++ = '"';
-> 211   *r = '\0';
   212
   213   return ret;
   214 }

(lldb) fr v ret rlen
(char *) ret = 0x0001039a9910 "\"$\""
(size_t) rlen = 3


diff --git a/lib/sh/shquote.c b/lib/sh/shquote.c
index a27b9202..98b3d927 100644
--- a/lib/sh/shquote.c
+++ b/lib/sh/shquote.c
@@ -188,7 +188,7 @@ sh_mkdoublequoted (const char *s, size_t slen, int flags)

   send = s + slen;
   mb_cur_max = flags ? MB_CUR_MAX : 1;
-  rlen = (flags == 0) ? slen + 3 : (2 * slen) + 1;
+  rlen = (flags == 0) ? slen + 3 : (2 * slen) + 3;
   ret = r = (char *)xmalloc (rlen);

   *r++ = '"';



Re: command_not_found_handle not run when PATH is empty

2023-03-08 Thread Grisha Levit
On Wed, Mar 8, 2023 at 4:28 PM Moshe Looks  wrote:
> This is very old code and I have no idea what the original intention
> was in returning 'name' here or if doing so is still performing useful
> work, but the tests pass at least.

A null $PATH component is treated as the current directory, and bash
treats an empty $PATH as having only a single null component, so we
can't just fail to return a command when $PATH is empty.

The current code's behavior of returning the path as provided when
$PATH is empty accomplishes the goal of "finding" the path in $PATH
but ignores any flags that would normally limit the results to files
that exist/are executable/etc.

This leads to weirdness not just with command_not_found_handle but
with command name completion and `type -a' and `command output as
well.

$ cd /bin
$ PATH= compgen -c -X '!bash'
$ PATH= type -a bash
-bash: type: bash: not found

I think it might make sense to change code that looks at the value of
PATH to explicitly treat an empty value as `.' so that all such
behavior is consistent.

diff --git a/bashline.c b/bashline.c
index 3238b13a..e0d6107c 100644
--- a/bashline.c
+++ b/bashline.c
@@ -2063,6 +2063,8 @@ command_word_completion_function (const char
*hint_text, int state)
  dequoted_hint = bash_dequote_filename (hint, 0);

   path = get_string_value ("PATH");
+  if (path == 0 || *path == '\0')
+ path = ".";
   path_index = dot_in_path = 0;

   /* Initialize the variables for each type of command word. */
diff --git a/findcmd.c b/findcmd.c
index 186871ac..b413c870 100644
--- a/findcmd.c
+++ b/findcmd.c
@@ -256,13 +256,13 @@ _find_user_command_internal (const char *name, int flags)

   /* Search for the value of PATH in both the temporary environments and
  in the regular list of variables. */
-  if (var = find_variable_tempenv ("PATH")) /* XXX could be array? */
-path_list = value_cell (var);
+  if (var = find_variable_tempenv ("PATH"))
+path_list = get_variable_value (var);
   else
-path_list = (char *)NULL;
+path_list = 0;

   if (path_list == 0 || *path_list == '\0')
-return (savestring (name));
+path_list = ".";

   cmd = find_user_command_in_path (name, path_list, flags, (int *)0);

@@ -363,11 +363,14 @@ search_for_command (const char *pathname, int flags)
 {
   if (flags & CMDSRCH_STDPATH)
  path_list = conf_standard_path ();
-  else if (temp_path || path)
- path_list = value_cell (path);
+  else if (path)
+ path_list = get_variable_value (path);
   else
  path_list = 0;

+  if (path_list == 0 || *path_list == '\0')
+ path_list = ".";
+
   command = find_user_command_in_path (pathname, path_list,
FS_EXEC_PREFERRED|FS_NODIRS, &st);

   if (command && hashing_enabled && temp_path == 0 && (flags &
CMDSRCH_HASH))
@@ -440,7 +443,9 @@ user_command_matches (const char *name, int flags,
int state)
if (stat (".", &dotinfo) < 0)
  dotinfo.st_dev = dotinfo.st_ino = 0; /* so same_file won't match */
path_list = get_string_value ("PATH");
-path_index = 0;
+   if (path_list == 0 || *path_list == '\0')
+ path_list = ".";
+   path_index = 0;
  }

   while (path_list && path_list[path_index])



use-after-free in read_token_word

2023-03-15 Thread Grisha Levit
./bash --norc -O noexpand_translation -in <<<'$":"'

=ERROR: AddressSanitizer: heap-use-after-free on address 0x000108102b40
READ of size 1 thread T0
#0 read_token_word parse.y:5236
#1 read_token parse.y:3618

freed by thread T0 here:
#1 read_token_word parse.y:5231
#2 read_token parse.y:3618


diff --git a/parse.y b/parse.y
index e3516e2d..0a8c039a 100644
--- a/parse.y
+++ b/parse.y
@@ -5228,15 +5228,19 @@ read_token_word (int character)
/* PST_NOEXPAND */
/* Try to locale-expand the converted string. */
ttrans = locale_expand (ttok, 0, ttoklen - 1, first_line, &ttranslen);
-   free (ttok);
-
/* Add the double quotes back (or single quotes if the user
   has set that option). */
if (singlequote_translations &&
  ((ttoklen - 1) != ttranslen || STREQN (ttok, ttrans, ttranslen) == 0))
- ttok = sh_single_quote (ttrans);
+ {
+   free (ttok);
+   ttok = sh_single_quote (ttrans);
+ }
else
- ttok = sh_mkdoublequoted (ttrans, ttranslen, 0);
+ {
+   free (ttok);
+   ttok = sh_mkdoublequoted (ttrans, ttranslen, 0);
+ }

free (ttrans);
ttrans = ttok;



Re: use-after-free in read_token_word

2023-03-15 Thread Grisha Levit
Also in parse_matched_pair:

diff --git a/parse.y b/parse.y
index 0a8c039a..1001ac1b 100644
--- a/parse.y
+++ b/parse.y
@@ -3906,14 +3906,13 @@ parse_matched_pair (int qc, int open, int
close, size_t *lenp, int flags)
/* Locale expand $"..." here. */
/* PST_NOEXPAND */
ttrans = locale_expand (nestret, 0, nestlen - 1, start_lineno, &ttranslen);
-   free (nestret);
-
/* If we're supposed to single-quote translated strings,
   check whether the translated result is different from
   the original and single-quote the string if it is. */
if (singlequote_translations &&
  ((nestlen - 1) != ttranslen || STREQN (nestret, ttrans,
ttranslen) == 0))
  {
+   free (nestret);
if ((rflags & P_DQUOTE) == 0)
  nestret = sh_single_quote (ttrans);
else if ((rflags & P_DQUOTE) && (dolbrace_state ==
DOLBRACE_QUOTE2) && (flags & P_DOLBRACE))
@@ -3923,7 +3922,10 @@ parse_matched_pair (int qc, int open, int
close, size_t *lenp, int flags)
  nestret = sh_backslash_quote_for_double_quotes (ttrans, 0);
  }
else
- nestret = sh_mkdoublequoted (ttrans, ttranslen, 0);
+ {
+   free (nestret);
+   nestret = sh_mkdoublequoted (ttrans, ttranslen, 0);
+ }
free (ttrans);
nestlen = strlen (nestret);
retind -= 2; /* back up before the $" */



double-free in bashline.c

2023-03-16 Thread Grisha Levit
A few functions in bashline.c free static variables but do not assign
to them until after calling bash_tilde_expand, which may
throw_to_top_level.  If SIGINT is received at an inopportune time,
these variables may be free-d again.

diff --git a/bashline.c b/bashline.c
index 2745c4dd..b5c0a49f 100644
--- a/bashline.c
+++ b/bashline.c
@@ -1970,6 +1970,7 @@ command_word_completion_function (const char
*hint_text, int state)
free (dequoted_hint);
   if (hint)
free (hint);
+  dequoted_hint = hint = (char *)NULL;

   mapping_over = searching_path = 0;
   hint_is_dir = CMD_IS_DIR (hint_text);
@@ -2252,6 +2253,7 @@ globword:
free (fnhint);
   if (filename_hint)
free (filename_hint);
+  fnhint = filename_hint = (char *)NULL;

   filename_hint = sh_makepath (current_path, hint, 0);
   /* Need a quoted version (though it doesn't matter much in most
@@ -2397,7 +2399,10 @@ command_subst_completion_function (const char
*text, int state)
   start_len = text - orig_start;
   filename_text = savestring (text);
   if (matches)
-   free (matches);
+   {
+ free (matches);
+ matches = (char **)NULL;
+   }

   /*
* At this point we can entertain the idea of re-parsing
@@ -3873,9 +3878,11 @@ glob_complete_word (const char *text, int state)
 {
   rl_filename_completion_desired = 1;
   FREE (matches);
+  matches = (char **)NULL;
   if (globorig != globtext)
FREE (globorig);
   FREE (globtext);
+  globorig = globtext = (char *)NULL;

   ttext = bash_tilde_expand (text, 0);



rl_filename_quoting_function restoration

2023-03-16 Thread Grisha Levit
bash_glob_complete_word modifies rl_filename_quoting_function, which
can fail to be restored if bash_tilde_expand handles a SIGINT while
glob-complete-word is running

Handling the restore in bashline_reset() seems to solve the issue:

diff --git a/bashline.c b/bashline.c
index 2745c4dd..7c3812eb 100644
--- a/bashline.c
+++ b/bashline.c
@@ -685,6 +685,7 @@ bashline_reset (void)
   complete_fullquote = 1;
   rl_filename_quote_characters = default_filename_quote_characters;
   set_filename_bstab (rl_filename_quote_characters);
+  rl_filename_quoting_function = bash_quote_filename;

   set_directory_hook ();
   rl_filename_stat_hook = bash_filename_stat_hook;



Re: global-buffer-overflow in parse.y

2023-03-16 Thread Grisha Levit
On Mon, Mar 6, 2023 at 9:16 AM Chet Ramey  wrote:
> Thanks for the report. It's the specific combination of `if' and the `(('
> command that causes the problem.

Looks like same thing also happens when `if' is followed by a newline

./bash -c $'case $LINENO in 0) if\n:; then echo FAIL; fi esac'
bash/parse.y:980:82: runtime error: index -1 out of bounds for type 'int[257]'
#0 0x100759bcc in yyparse parse.y:980



asan report in extmatch

2023-03-16 Thread Grisha Levit
The relevant code was added in
https://git.savannah.gnu.org/cgit/bash.git/commit/?id=da43077 with
similar additions to both gmatch and extmatch, but I suspect the test
on line 912 was not meant to be in extmatch:

> .a
bash -O extglob -O dotglob -c ': ./!(.foo)'

ERROR: AddressSanitizer: heap-buffer-overflow on address 0x000102e02daf
READ of size 1 at 0x000102e02daf thread T0
#0 extmatch sm_loop.c:912

frame #5: bash`extmatch(xc=33, s=".a", se="", p="(.foo)", pe="",
flags=161) at sm_loop.c:912:36
   909
   910   if (m1 == 0 && (flags & FNM_DOTDOT) &&
   911   (SDOT_OR_DOTDOT (s) ||
-> 912((flags & FNM_PATHNAME) && s[-1] == L('/') && PDOT_OR_DOTDOT(s
   913 return (FNM_NOMATCH);
   914
   915   /* if srest > s, we are not at start of string */



EXIT trap definition order

2023-03-17 Thread Grisha Levit
It seems that if a trap handler for a terminating signal resends its
own signal (after resetting the signal disposition), any configured
EXIT trap will be executed (as I think is expected), but only if an
EXIT trap had already been set prior to the the first instance of a
trap having been set for the terminating signal in question.

That is, this script will print "EXIT":

trap 'echo EXIT' EXIT
trap 'trap TERM; kill 0' TERM
kill 0

Whereas this one will not:

trap 'trap TERM; kill 0' TERM
trap 'echo EXIT' EXIT
kill 0



compgen respecting dotglob changes

2023-03-19 Thread Grisha Levit
compgen's glob matching does not respect dotglob being turned on or
off unless some other globbing takes place between the setting being
changed and compgen being called:

$ cd "$(mktemp -d)"
$ > .x
$ bash -c 'shopt -s dotglob; compgen -G \*'
$ bash -c 'shopt -s dotglob; : *; compgen -G \*'
.x



history file missing timestamp when HISTFILESIZE reached

2023-03-19 Thread Grisha Levit
When HISTTIMEFORMAT is set and history file truncation is performed,
the first line of the history file (i.e. the timestamp of the first
entry) seems to always be missing

> /tmp/hist
HISTTIMEFORMAT= HISTFILESIZE=3 HISTFILE=/tmp/hist bash --norc -in <<<$'1\n2\n3'

$ cat /tmp/hist
1
#1679274410
2
#1679274410
3



parsing command substitution inside parameter expansion in interactive shell

2023-03-19 Thread Grisha Levit
If a command substitution inside a parameter expansion has a command
followed by a newline, bash prints an error message (though the
command is parsed and saved in the history list correctly):

bash --norc -in <<<$'${_+$(:\n)}\n!!'
$ ${_+$(:
bash: command substitution: line 3: unexpected EOF while looking for
matching `)'
> )}
$ !!
${_+$(:; )}

If the command substitution starts with a newline, no error is printed
but the command is _not_ saved correctly to the history list (a
semicolon is inserted at the start):

bash --norc -in <<<$'${_+$(\n:)}\n!!'
$ ${_+$(
> :)}
$ !!
${_+$(; :)}



[PATCH] Save more readline state when running compgen

2023-03-19 Thread Grisha Levit
Completion state is not fully restored after invoking `compgen' within
a competition function.

Normally, if a compspec does not specifically include one of the
options that triggers filename completion, the generated completions
are not treated as filenames:

$ complete -W '/tmp /var' cmd
$ cmd /[TAB]
/tmp /var

However, if a completion function is invoked, and that function
happens to call `compgen' with one of the options that triggers
filename completion, the generated completions _are_ treated as
filenames (i.e. get quoted, have `/' appended to directories, etc.)

$ fun() { compgen -f >/dev/null; }
$ complete -W '/tmp /var' -F fun cmd
$ cmd /[TAB]
tmp/ var/



diff --git a/builtins/complete.def b/builtins/complete.def
index 0d9b9293..37eb20ba 100644
--- a/builtins/complete.def
+++ b/builtins/complete.def
@@ -650,7 +650,7 @@ compgen_builtin (WORD_LIST *list)
   STRINGLIST *sl;
   char *word, **matches;
   char *old_line;
-  int old_ind;
+  int old_ind, old_completion, old_quoting, old_suppress;

   if (list == 0)
 return (EXECUTION_SUCCESS);
@@ -692,6 +692,10 @@ compgen_builtin (WORD_LIST *list)

   rval = EXECUTION_FAILURE;

+  old_completion = rl_filename_completion_desired;
+  old_quoting = rl_filename_quoting_desired;
+  old_suppress = rl_completion_suppress_append;
+
   /* probably don't have to save these, just being safe */
   old_line = pcomp_line;
   old_ind = pcomp_ind;
@@ -720,6 +724,10 @@ compgen_builtin (WORD_LIST *list)
   strvec_dispose (matches);
 }

+  rl_filename_completion_desired = old_completion;
+  rl_filename_quoting_desired = old_quoting;
+  rl_completion_suppress_append = old_suppress;
+
   if (sl)
 {
   if (sl->list && sl->list_len)



wait on procsub in EXIT trap

2023-03-19 Thread Grisha Levit
If an EXIT trap is executed after receipt of a terminating signal,
waiting on a process substitution within the trap can fail:

$ (trap 'wait $!; echo $?' EXIT; : <(:); kill 0)
-bash: wait: pid 83694 is not a child of this shell
127

Interestingly, if an external command or a subshell is executed after
the process substitution is started but prior to receipt of the
signal, the `wait' works fine:

$ (trap 'wait $!; echo $?' EXIT; : <(:); (:); kill 0)
0



Re: EXIT trap definition order

2023-03-20 Thread Grisha Levit
On Mon, Mar 20, 2023 at 10:57 AM Chet Ramey  wrote:
>
> On 3/17/23 1:28 PM, Grisha Levit wrote:
> > It seems that if a trap handler for a terminating signal resends its
> > own signal (after resetting the signal disposition), any configured
> > EXIT trap will be executed (as I think is expected), but only if an
> > EXIT trap had already been set prior to the the first instance of a
> > trap having been set for the terminating signal in question.
>
> If the shell is handling a terminating signal and receives another instance
> of that signal while doing so, it takes that as an indication to exit
> immediately.

Thanks, that makes sense. But then the first script should _not_ print
EXIT, right?

i.e. if both TERM and EXIT traps are defined, it shouldn't matter
which one happened to be defined first.



Re: wait on procsub in EXIT trap

2023-03-21 Thread Grisha Levit
On Tue, Mar 21, 2023 at 3:28 PM Chet Ramey  wrote:
>
> > Interestingly, if an external command or a subshell is executed after
> > the process substitution is started but prior to receipt of the
> > signal, the `wait' works fine:
> >
> > $ (trap 'wait $!; echo $?' EXIT; : <(:); (:); kill 0)
>
> Because the procsub gets reaped before the terminating signal arrives.

Oh I see, thanks.

So to wait for a procsub after receiving a terminating signal, one
must do so in a trap for the signal itself and not in an EXIT trap.



Re: [PATCH] Save more readline state when running compgen

2023-03-21 Thread Grisha Levit
On Tue, Mar 21, 2023 at 3:47 PM Chet Ramey  wrote:
> OK, say you did in fact run compgen in the foreground -- very unusual
> because it's awkward to capture the possible completions that way -- to
> generate completions.

Sorry I should have explained. I hit this issue because I was using
compgen in an unorthodox way, just to check if some files matching a
glob exist. Basically something like

compgen -G 'foo/*' >/dev/null && COMPREPLY=(bar)

This can be a lot faster than expanding the glob into an array and
then testing that the array is not empty.



Re: [PATCH] Save more readline state when running compgen

2023-03-21 Thread Grisha Levit
On Tue, Mar 21, 2023 at 4:11 PM Chet Ramey  wrote:
> OK, which do you think would be the more common case? Wanting the options
> used to generate completions to persist or using it in this way?

Usually people do `COMPREPLY=($(compgen ...))' (or write to stdout in
a command specified with `complete -C') so nothing persists anyway,
right?

I don't know if there's really any use case that generates completions
to place into COMPREPLY by running compgen in the foreground so it's
hard for me to imagine a use case that benefits from the readline
state being modified.



Re: [PATCH] Save more readline state when running compgen

2023-03-21 Thread Grisha Levit
On Tue, Mar 21, 2023 at 5:26 PM alex xmb ratchev  wrote:
> On Tue, Mar 21, 2023, 21:05 Grisha Levit  wrote:
>>
>> compgen -G 'foo/*' >/dev/null && COMPREPLY=(bar)
>
> i dont get that code at all , but i like idea of speedier file filling .. can 
> u explain some in short ?

Let's say you want to know if there are any entries starting with
`foo' in the current directory.  You can do:

GLOBIGNORE=
shopt -s nullglob
set +o noglob
tmp=(foo*)
if (( ${#tmp[@]} )); then ...

Or you can do

if compgen -G 'foo*' >/dev/null; then ...

The latter doesn't care about nullglob / noglob / GLOBIGNORE and if
the number of files is large can be a lot faster.

P.S. If you are interested in files that may or may not start with a
dot, you do need to set dotglob in either case, which was the
motivation for https://lists.gnu.org/archive/html/bug-bash/2023-03/msg00062.html



Re: [PATCH] Save more readline state when running compgen

2023-03-21 Thread Grisha Levit
On Tue, Mar 21, 2023 at 5:52 PM alex xmb ratchev  wrote:
> On Tue, Mar 21, 2023, 22:42 Grisha Levit  wrote:
>> Let's say you want to know if there are any entries starting with
>> `foo' in the current directory.  You can do:
>
> i see , thank you sir
> i .. mostly dont have that case ( if existing n no more )
> i always along the paths , to be used

The most common use case would probably be to see if a directory is
empty or not, like:

compgen -G 'dir/@(*|.*)' >/dev/null || echo 'dir is empty'

I don't see it mentioned in https://mywiki.wooledge.org/BashFAQ/004
but I think it's nice because you don't need subshells and don't have
to fiddle with shopt, etc.



Re: parsing command substitution inside parameter expansion in interactive shell

2023-03-25 Thread Grisha Levit
On Mon, Mar 20, 2023 at 4:59 PM Chet Ramey  wrote:
> Thanks, it's an easy fix to preserve the newline here.

FWIW even with the latest fixes, this kind of nesting in a history
entry still triggers ASAN:

bash --norc -in <<<$'${_+$(\n \cP\en '

ERROR: AddressSanitizer: heap-buffer-overflow
READ of size 1 at 0x000105c0c2cf thread T0
#0 bash_add_history bashhist.c:898
#1 maybe_add_history bashhist.c:759
#2 pre_process_line bashhist.c:628
#3 shell_getc parse.y:2501
#4 read_token parse.y:3425
#5 yylex parse.y:2915
#6 yyparse y.tab.c:1869
#7 parse_comsub parse.y:4306
#8 parse_matched_pair parse.y:3973
#9 read_token_word parse.y:5172
#10 read_token parse.y:3621
#11 yylex parse.y:2915
#12 yyparse y.tab.c:1869
#13 parse_command eval.c:345
#14 read_command eval.c:389
#15 reader_loop eval.c:139
#16 main shell.c:821

Avoided by:

diff --git a/bashhist.c b/bashhist.c
index 21c77058..b48306c2 100644
--- a/bashhist.c
+++ b/bashhist.c
@@ -895,7 +895,7 @@ bash_add_history (char *line)
 newline, since that is what happens when the line is parsed. */
  curlen = strlen (current->line);

- if (dstack.delimiter_depth == 0 && current->line[curlen - 1]
== '\\' &&
+ if (curlen && dstack.delimiter_depth == 0 &&
current->line[curlen - 1] == '\\' &&
  (curlen < 2 || current->line[curlen - 2] != '\\'))
{
  current->line[curlen - 1] = '\0';



bash --norc -in <<<$'${_+$(\n\e.'

ERROR: AddressSanitizer: heap-buffer-overflow
READ of size 1 at 0x00010870c517 thread T0
#0 history_tokenize_word histexpand.c:1561
#1 history_tokenize_internal histexpand.c:1660
#2 history_tokenize histexpand.c:1693
#3 history_arg_extract histexpand.c:1435
#4 rl_yank_nth_arg_internal kill.c:628
#5 rl_yank_last_arg kill.c:698
#6 _rl_dispatch_subseq readline.c:922

Avoided by:

diff --git a/lib/readline/histexpand.c b/lib/readline/histexpand.c
index d21240bf..8a147e40 100644
--- a/lib/readline/histexpand.c
+++ b/lib/readline/histexpand.c
@@ -1603,6 +1603,8 @@ get_word:
  delimopen = '(';
  delimiter = ')';
  nestdelim = 1;
+  if (string[i] == 0)
+break;
  continue;
}



segfault in hostnames_matching

2023-03-27 Thread Grisha Levit
another size_t issue

diff --git a/bashline.c b/bashline.c
index 0047caef..9df26d2e 100644
--- a/bashline.c
+++ b/bashline.c
@@ -919,7 +919,7 @@ hostnames_matching (const char *text)
continue;

   /* OK, it matches.  Add it to the list. */
-  if (nmatch >= (rsize - 1))
+  if ((nmatch + 1) >= rsize)
{
  rsize = (rsize + 16) - (rsize % 16);
  result = strvec_resize (result, rsize);



size_t issue in expand_string_dollar_quote

2023-03-29 Thread Grisha Levit
bash --norc -in <<<$'"\e\cE'

ERROR: AddressSanitizer: negative-size-param: (size=-1)
#0 wrap_strncpy+0x228
#1 expand_string_dollar_quote subst.c:4108
#2 shell_expand_line bashline.c:2887

probably not the cleanest fix but the issue is here:

diff --git a/subst.c b/subst.c
index 2ff9b7c2..35c0fdd1 100644
--- a/subst.c
+++ b/subst.c
@@ -4100,7 +4100,7 @@ expand_string_dollar_quote (const char *string, int flags)
news = skip_single_quoted (string, slen, ++sindex, SX_COMPLETE);
  else
news = skip_double_quoted (string, slen, ++sindex, SX_COMPLETE);
- translen = news - sindex - 1;
+ translen = (news > sindex) ? news - sindex - 1 : 0;
  RESIZE_MALLOCED_BUFFER (ret, retind, translen + 3, retsize, 64);
  ret[retind++] = c;
  if (translen > 0)



[PATCH] fix bcopy params

2023-04-01 Thread Grisha Levit
---
 lib/sh/oslib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/sh/oslib.c b/lib/sh/oslib.c
index ab7924df..85f7b491 100644 --- a/lib/sh/oslib.c
+++ b/lib/sh/oslib.c @@ -161,7 +161,7 @@ getdtablesize (void) # undef bcopy
 # endif void
-bcopy (void *s, *d, size_t n)
+bcopy (void *s, void *d, size_t n) {
   FASTCOPY (s, d, n); }
--
2.40.0


bug-bash@gnu.org

2023-04-06 Thread Grisha Levit
If $var is non-empty and not a valid a number, {var}>&- silently closes fd
0.

var=x; (exec {var}>&-; test -e /dev/fd/0); echo $?
1

Seems like the test of the return value of legal_number is incorrect.
---
 redir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/redir.c b/redir.c
index 804d9b82..267a9fc8 100644
--- a/redir.c
+++ b/redir.c
@@ -1459,7 +1459,7 @@ redir_varvalue (REDIRECT *redir)
   if (val == 0 || *val == 0)
 return -1;

- if (legal_number (val, &vmax) < 0)
+ if (legal_number (val, &vmax) <= 0)
 return -1;

   i = vmax; /* integer truncation */
-- 
2.40.0


parameter expansion assignment to array index nameref

2023-04-06 Thread Grisha Levit
$ declare -n ref=arr[1]
$ arr=(A); (echo "${ref=X} ${ref}")
A X
$ arr=( ); (echo "${ref=X} ${ref}")
Segmentation fault: 11

Expanding the nameref while still in parameter_brace_expand seems to do the
right thing though I can't say I'm fully confident in this.

diff --git a/subst.c b/subst.c
index 555d18db..64f4d6da 100644
--- a/subst.c
+++ b/subst.c
@@ -9260,6 +9260,7 @@ parameter_brace_expand (char *string, int *indexp,
int quoted, int pflags, int *
   int want_substring, want_indir, want_patsub, want_casemod,
want_attributes;
   char *name, *value, *temp, *temp1;
   WORD_DESC *tdesc, *ret;
+  SHELL_VAR *var;
   int t_index, sindex, c, tflag, modspec, local_pflags,
all_element_arrayref;
   intmax_t number;
   array_eltstate_t es;
@@ -9583,6 +9584,13 @@ parameter_brace_expand (char *string, int *indexp,
int quoted, int pflags, int *
   return (temp == &expand_param_error ? &expand_wdesc_error :
&expand_wdesc_fatal);
 }

+  if ((var = find_variable_last_nameref (name, 0)) && nameref_p (var) &&
+  (temp1 = nameref_cell (var)) && *temp1)
+{
+  FREE (name);
+  name = savestring (temp1);
+}
+
 #if defined (ARRAY_VARS)
   if (valid_array_reference (name, 0))
 {


[PATCH] compgen -V option (store output in array)

2023-04-13 Thread Grisha Levit
Since the predominant use case for compgen is  generating output that then
gets split back up into an array, it seems like it would be nice to have an
option that avoids the extra steps (and their associated pitfalls)
From e75306bd43d4ed233a7fedddb060a6bdd9305fdc Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Thu, 13 Apr 2023 04:14:18 -0400
Subject: [PATCH] compgen -V option

---
 builtins/complete.def| 72 
 externs.h|  1 +
 lib/readline/doc/rluser.texi | 11 --
 3 files changed, 64 insertions(+), 20 deletions(-)

diff --git a/builtins/complete.def b/builtins/complete.def
index 881c4711..0c68aec1 100644
--- a/builtins/complete.def
+++ b/builtins/complete.def
@@ -87,7 +87,7 @@ struct _optflags {
 static int find_compact (char *);
 static int find_compopt (char *);
 
-static int build_actions (WORD_LIST *, struct _optflags *, unsigned long *, unsigned long *);
+static int build_actions (WORD_LIST *, struct _optflags *, unsigned long *, unsigned long *, char **);
 
 static int remove_cmd_completions (WORD_LIST *);
 
@@ -177,9 +177,10 @@ find_compopt (char *name)
 /* Build the actions and compspec options from the options specified in LIST.
ACTP is a pointer to an unsigned long in which to place the bitmap of
actions.  OPTP is a pointer to an unsigned long in which to place the
-   bitmap of compspec options (arguments to `-o').  PP, if non-null, gets 1
-   if -p is supplied; RP, if non-null, gets 1 if -r is supplied.
-   If either is null, the corresponding option generates an error.
+   bitmap of compspec options (arguments to `-o').  FLAGP is a pointer to a
+   struct that stores non-action binary options.  VNAMEP is a pointer to a
+   string to store the name of the output variable.
+   If any of the above is null, the corresponding option generates an error.
This also sets variables corresponding to options that take arguments as
a side effect; the caller should ensure that those variables are set to
NULL before calling build_actions.  Return value:
@@ -189,7 +190,7 @@ find_compopt (char *name)
 */
 
 static int
-build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, unsigned long *optp)
+build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, unsigned long *optp, char **vnamep)
 {
   int opt, ind, opt_given;
   unsigned long acts, copts;
@@ -199,7 +200,7 @@ build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, un
   opt_given = 0;
 
   reset_internal_getopt ();
-  while ((opt = internal_getopt (list, "abcdefgjko:prsuvA:G:W:P:S:X:F:C:DEI")) != -1)
+  while ((opt = internal_getopt (list, "abcdefgjko:prsuvA:G:W:P:S:X:F:C:V:DEI")) != -1)
 {
   opt_given = 1;
   switch (opt)
@@ -341,6 +342,25 @@ build_actions (WORD_LIST *list, struct _optflags *flagp, unsigned long *actp, un
 	case 'S':
 	  Sarg = list_optarg;
 	  break;
+case 'V':
+#if defined (ARRAY_VARS)
+	  if (vnamep)
+	{
+	  *vnamep = list_optarg;
+	  if (legal_identifier (*vnamep) == 0)
+		{
+		  sh_invalidid (*vnamep);
+		  return (EX_USAGE);
+		}
+	}
+	  else
+#endif
+	{
+	  sh_invalidopt ("-V");
+	  builtin_usage ();
+	  return (EX_USAGE);
+	}
+  break;
 	case 'W':
 	  Warg = list_optarg;
 	  break;
@@ -385,7 +405,7 @@ complete_builtin (WORD_LIST *list)
 
   /* Build the actions from the arguments.  Also sets the [A-Z]arg variables
  as a side effect if they are supplied as options. */
-  rval = build_actions (list, &oflags, &acts, &copts);
+  rval = build_actions (list, &oflags, &acts, &copts, (char **)NULL);
   if (rval == EX_USAGE)
 return (rval);
   opt_given = rval != EXECUTION_FAILURE;
@@ -630,12 +650,13 @@ print_cmd_completions (WORD_LIST *list)
 $BUILTIN compgen
 $DEPENDS_ON PROGRAMMABLE_COMPLETION
 $FUNCTION compgen_builtin
-$SHORT_DOC compgen [-abcdefgjksuv] [-o option] [-A action] [-G globpat] [-W wordlist] [-F function] [-C command] [-X filterpat] [-P prefix] [-S suffix] [word]
+$SHORT_DOC compgen [-abcdefgjksuv] [-o option] [-A action] [-G globpat] [-W wordlist] [-F function] [-C command] [-X filterpat] [-P prefix] [-S suffix] [-V var] [word]
 Display possible completions depending on the options.
 
 Intended to be used from within a shell function generating possible
 completions.  If the optional WORD argument is supplied, matches against
-WORD are generated.
+WORD are generated.  With the -V option, completions are stored in the
+indexed array VAR.
 
 Exit Status:
 Returns success unless an invalid option is supplied or an error occurs.
@@ -651,17 +672,21 @@ compgen_builtin (WORD_LIST *list)
   char *word, **matches;
   char *old_line;
   int old_ind, old_completion, old_quoting, old_suppress;
+  char *varname;
+  SHELL_VAR *var;
+  WORD_LIST *alist;
 
   if (list == 0)
 return (EXECUTION_SUCCESS);
 
   acts

[PATCH] support getconf builtin build without confstr

2023-04-13 Thread Grisha Levit
Currently, the getconf build fails on platforms (e.g. Android) without
confstr(3)
From c1a616fd5ff18a202ad934bbb23ba4d58685 Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Thu, 13 Apr 2023 04:32:04 -0400
Subject: [PATCH] support getconf builtin build without confstr

---
 examples/loadables/getconf.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/examples/loadables/getconf.c b/examples/loadables/getconf.c
index 0dc213be..cd89b82f 100644
--- a/examples/loadables/getconf.c
+++ b/examples/loadables/getconf.c
@@ -47,7 +47,15 @@ struct conf
   {
 const char *name;
 const long call_name;	/* or value for CONSTANT */
-const enum { SYSCONF, CONFSTR, PATHCONF, CONSTANT, UNDEFINED } call;
+const enum {
+SYSCONF,
+#if HAVE_CONFSTR
+CONFSTR,
+#endif
+PATHCONF,
+CONSTANT,
+UNDEFINED
+} call;
   };
 
 static const struct conf vars[] =
@@ -489,8 +497,10 @@ static const struct conf vars[] =
 { "POSIX2_UPE", _SC_2_UPE, SYSCONF },
 { "POSIX2_VERSION", _SC_2_VERSION, SYSCONF },
 
+#ifdef _CS_PATH
 { "PATH", _CS_PATH, CONFSTR },
 { "CS_PATH", _CS_PATH, CONFSTR },
+#endif
 
 /* LFS */
 #ifdef _CS_LFS_CFLAGS
@@ -1072,6 +1082,7 @@ getconf_print (const struct conf *c, const char *vpath, int all)
 	printf ("%ld\n", value);
   return (EXECUTION_SUCCESS);
 
+#if HAVE_CONFSTR
 case CONFSTR:
   errno = 0;
   clen = confstr (cn, (char *) NULL, 0);
@@ -1092,6 +1103,7 @@ getconf_print (const struct conf *c, const char *vpath, int all)
 	printf ("%.*s\n", (int) clen, cvalue);
   free (cvalue);
   return (EXECUTION_SUCCESS);
+#endif
 
 case CONSTANT:
   return (getconf_internal (c, all));
-- 
2.40.0



[PATCH] allow quoting completions w/o filename treatment

2023-04-14 Thread Grisha Levit
Currently, there isn't any way to request quoting of completion matches
without getting also the attendant filename treatment.

In practice, authors of completion functions that need the matches to be
quoted often use `-o filenames', which works fine unless the matches
include slashes or happen to match existing directory names.

Alternatively, some authors opt to do their own quoting of matches but
doing so properly for partial word completion is tricky as it requires
accounting for the quoting style chosen by the user when they typed the
partial word.

The attached patch hopefully addresses these difficulties by allowing for
the decoupling of filename-specific match handling from match quoting.

It adds a new completion option `allquote' (I'm not quite happy with the
name) which controls the corresponding new readline variable
rl_all_quoting_desired.
From 8049e9f920fde4f84422fd651ba4a4beeafb7f10 Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Fri, 14 Apr 2023 16:10:22 -0400
Subject: [PATCH] allow quoting completions w/o filename treatment

lib/readline/complete.c
- rl_all_quoting_desired: new variable to tell readline to quote
  matches even if not doing filename completion
- QUOTING_DESIRED: new macro for checking if quoting is enabled
- change checks of filename comoletion && filename quoting to use
  QUOTING_DESIRED.
- set_completion_defaults: set rl_all_quoting_desired to 0

lib/readline/readline.h
- rl_all_quoting_desired: extern declaration

lib/readline/doc/rltech.texi
- document rl_all_quoting_desired

pcomplete.h
- COPT_ALLQUOTE: new value for compspec option, corresponds to
  rl_all_quoting_desired

pcomplete.c
- pcomp_set_readline_variables: handle COPT_ALLQUOTE

builtins/complete.def
- compopts: add allquote option for COPT_ALLQUOTE

lib/readline/doc/rluser.texi
- document allquote compopt
---
 builtins/complete.def|  1 +
 lib/readline/complete.c  | 26 +-
 lib/readline/doc/rltech.texi |  7 +++
 lib/readline/doc/rluser.texi |  3 +++
 lib/readline/readline.h  |  6 ++
 pcomplete.c  |  2 ++
 pcomplete.h  |  3 ++-
 7 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/builtins/complete.def b/builtins/complete.def
index 881c4711..89ce0726 100644
--- a/builtins/complete.def
+++ b/builtins/complete.def
@@ -141,6 +141,7 @@ static const struct _compopt {
   const char * const optname;
   unsigned long optflag;
 } compopts[] = {
+  { "allquote", COPT_ALLQUOTE },
   { "bashdefault", COPT_BASHDEFAULT },
   { "default",	COPT_DEFAULT },
   { "dirnames", COPT_DIRNAMES },
diff --git a/lib/readline/complete.c b/lib/readline/complete.c
index bf7cc856..3cf050fc 100644
--- a/lib/readline/complete.c
+++ b/lib/readline/complete.c
@@ -336,6 +336,14 @@ int rl_filename_completion_desired = 0;
entry finder function. */
 int rl_filename_quoting_desired = 1;
 
+/* Non-zero means that we should apply filename quoting to the matches
+   even if we are not otherwise treating the matches as filenames. This
+   is ALWAYS zero on entry, and can only be changed within a completion
+   entry finder function. */
+int rl_all_quoting_desired = 0;
+
+#define QUOTING_DESIRED (rl_all_quoting_desired || (rl_filename_completion_desired && rl_filename_quoting_desired))
+
 /* This function, if defined, is called by the completer when real
filename completion is done, after all the matching names have been
generated. It is passed a (char**) known as matches in the code below.
@@ -510,7 +518,7 @@ static void
 set_completion_defaults (int what_to_do)
 {
   /* Only the completion entry function can change these. */
-  rl_filename_completion_desired = 0;
+  rl_filename_completion_desired = rl_all_quoting_desired = 0;
   rl_filename_quoting_desired = 1;
   rl_completion_type = what_to_do;
   rl_completion_suppress_append = rl_completion_suppress_quote = 0;
@@ -1408,10 +1416,7 @@ compute_lcd_of_matches (char **match_list, int matches, const char *text)
 		   check against the list of matches
 		FI */
 	  dtext = (char *)NULL;
-	  if (rl_filename_completion_desired &&
-	  rl_filename_dequoting_function &&
-	  rl_completion_found_quote &&
-	  rl_filename_quoting_desired)
+	  if (QUOTING_DESIRED && rl_completion_found_quote && rl_filename_dequoting_function)
 	{
 	  dtext = (*rl_filename_dequoting_function) ((char *)text, rl_completion_quote_character);
 	  text = dtext;
@@ -1766,10 +1771,7 @@ make_quoted_replacement (char *match, int mtype, char *qc)
  matches don't require a quoted substring. */
   replacement = match;
 
-  should_quote = match && rl_completer_quote_characters &&
-			rl_filename_completion_desired &&
-			rl_filename_quoting_desired;
-
+  should_quote = match && rl_completer_quote

Prompt save/restore for vi movement argument

2023-04-15 Thread Grisha Levit
Originally identified in [1] (FWIW, I only see the message in the archive,
not in my inbox).

[1]: https://lists.gnu.org/archive/html/bug-bash/2023-04/msg00043.html

bash --norc -o vi -in <<<$'\ec1234567'
(arg: 123456) Segmentation fault

I think it's just a matter of save/restore_prompt



diff --git a/lib/readline/vi_mode.c b/lib/readline/vi_mode.c
index 7396e316..ddd8fd95 100644
--- a/lib/readline/vi_mode.c
+++ b/lib/readline/vi_mode.c
@@ -1090,6 +1090,7 @@ _rl_vi_arg_dispatch (int c)
 }
   else
 {
+ rl_restore_prompt ();
   rl_clear_message ();
   rl_stuff_char (key);
   return 0; /* done */
@@ -1331,6 +1332,7 @@ rl_domove_read_callback (_rl_vimotion_cxt *m)
   rl_numeric_arg = _rl_digit_value (c);
   rl_explicit_arg = 1;
   RL_SETSTATE (RL_STATE_NUMERICARG);
+ rl_save_prompt ();
   rl_digit_loop1 ();
   rl_numeric_arg *= save;
   c = rl_vi_domove_getchar (m);


segfault on initial word completion

2023-04-18 Thread Grisha Levit
$ bash --rcfile <(echo 'complete -C: -I') -i <<<$'; \cA\t'
bash-5.2$ bash: xmalloc: cannot allocate 18446744073709551615 bytes
$ bash --rcfile <(echo 'complete -C: -I') -i <<<$';\cA \t'
bash-5.2$  Segmentation fault: 11



SIGINT not breaking loop with non-list function body

2023-04-19 Thread Grisha Levit
If an interactive shell line is just a single loop and the body of the loop
consists only of a function name, and the function body is not a list, ^C
does not cause the loop to break as it otherwise would.

$ f() { cat; }
$ while :; do f; done
^C

The code in wait_for() checks the values of loop_level and executing_list:

jobs.c:5095:55
-> 5095 if (signal_is_trapped (SIGINT) == 0 && (loop_level ||
(shell_compatibility_level > 32 && executing_list)))
   5096 ADDINTERRUPT;

but loop_level is reset by execute_function():

execute_cmd.c:5171:5
   5170 if (shell_compatibility_level > 43)
-> 5171 loop_level = 0;


This code in wait_for seems to be the only place where the value of
executing_list is tested, so maybe changing that variable to something like
executing_list_or_loop and adding in/decrements of it in the loop-related
execute_* functions would make sense.


Re: segfault on initial word completion

2023-04-21 Thread Grisha Levit
On Wed, Apr 19, 2023, 10:23 Chet Ramey  wrote:

> On 4/18/23 6:15 PM, Grisha Levit wrote:
> > $ bash --rcfile <(echo 'complete -C: -I') -i <<<$'; \cA\t'
> > bash-5.2$ bash: xmalloc: cannot allocate 18446744073709551615 bytes
> > $ bash --rcfile <(echo 'complete -C: -I') -i <<<$';\cA \t'
> > bash-5.2$  Segmentation fault: 11
> >
>
> Thanks for the report.
>

One more issue after the fix, the start can end up being further than
pcomp_ind

bash --rcfile <(echo 'complete -C: -I') -i <<<$' \cA\t'

ERROR: AddressSanitizer: heap-buffer-overflow
READ of size 1 at 0x0044e7a7d22f thread T0
#0 0x63e8175148 in bind_compfunc_variables pcomplete.c:946:7
#1 0x63e816afb0 in gen_command_matches pcomplete.c:1179:3
#2 0x63e8165358 in gen_compspec_completions pcomplete.c:1389:18
#3 0x63e81700e0 in gen_progcomp_completions pcomplete.c:1539:9
#4 0x63e816eb7c in programmable_completions pcomplete.c:1592:13
#5 0x63e810b4b8 in attempt_shell_completion bashline.c:1745:26


heap-buffer-overflow in history_expand

2023-04-23 Thread Grisha Levit
The history expansion code can end up reading past the end of the
input line buffer if the line ends with an invalid multibyte sequence:

bash --norc -in <<<$'X\n\e238Y!!\xC2\xC2'
ERROR: AddressSanitizer: heap-buffer-overflow
READ of size 1 at 0x000108b48400 thread T0
#0 0x104ed9c88 in history_expand histexpand.c:1129
#1 0x104b761b0 in pre_process_line bashhist.c:570
#2 0x10482a540 in shell_getc parse.y:2512

diff --git a/lib/readline/histexpand.c b/lib/readline/histexpand.c
index db344b49..425ea7cf 100644
--- a/lib/readline/histexpand.c
+++ b/lib/readline/histexpand.c
@@ -1121,7 +1121,7 @@ history_expand (const char *hstring, char **output)

  c = tchar;
  memset (mb, 0, sizeof (mb));
- for (k = 0; k < MB_LEN_MAX; k++)
+ for (k = 0; k < MB_LEN_MAX && i < l; k++)
{
  mb[k] = (char)c;
  memset (&ps, 0, sizeof (mbstate_t));



heap-use-after-free in rl_do_undo

2023-04-25 Thread Grisha Levit
This segfaults in a non-ASAN build:

HISTFILE= INPUTRC=<(echo '"F": history-substring-search-forward') \
bash --norc -in <<<$'.\n..\n\cP\cT\cPF\cN\cN.\cPF\c_'

ERROR: AddressSanitizer: heap-use-after-free on address 0x0001060082a8
READ of size 4 at 0x0001060082a8 thread T0
#0 0x1027627b8 in rl_do_undo undo.c:188
#1 0x102764b38 in rl_undo_command undo.c:358
#2 0x102661904 in _rl_dispatch_subseq readline.c:922

0x0001060082a8 is located 24 bytes inside of 32-byte region
[0x000106008290,0x0001060082b0)
freed by thread T0 here:
#0 0x102f6afa4 in wrap_free+0x98
#1 0x1024c8648 in xfree xmalloc.c:140
#2 0x102761834 in _rl_free_undo_list undo.c:111
#3 0x10278fbcc in _rl_free_saved_history_line misc.c:404
#4 0x10269aed8 in rl_history_search_reinit search.c:637
#5 0x10269bec0 in rl_history_substr_search_forward search.c:688
#6 0x102661904 in _rl_dispatch_subseq readline.c:922

previously allocated by thread T0 here:
#0 0x102f6ae68 in wrap_malloc+0x94
#1 0x1024c84ec in xmalloc xmalloc.c:107
#2 0x102761088 in alloc_undo_entry undo.c:75
#3 0x102760f60 in rl_add_undo undo.c:92
#4 0x102779198 in rl_insert_text text.c:113
#5 0x102781710 in _rl_insert_char text.c:903
#6 0x102782664 in rl_insert text.c:955
#7 0x102661904 in _rl_dispatch_subseq readline.c:922



recovering from parser errors during compound assignment

2023-04-26 Thread Grisha Levit
If a parser error is encountered when `local' is parsing an array
assignment string, the installed unwind-protects never run:

$ f() { local -a a='(())'; }; f
bash: syntax error near unexpected token `('
$ declare -p a FUNCNAME
declare -a a
declare -a FUNCNAME=([0]="f")



If there's a PROMPT_COMMAND set and a parser error is encountered during a
compound assignment, the alias expansion state is not properly restored:

$ PROMPT_COMMAND=:; alias A=:
$ declare a=(())
bash: syntax error near unexpected token `('
$ A
bash: A: command not found


EOF at PS2

2023-04-26 Thread Grisha Levit
A few issues with EOF being received at PS2:


The setting of ignoreeof is ignored at PS2:

bash --norc -o ignoreeof
$ uname \
> ^D
Linux
(bash exits)


If the previous line didn't terminate the current token, ^D causes the
token (rather than the command) to be terminated and a new PS2 to be
printed. The history code treats them as one word.

bash --norc
$ echo A\
> ^D
> B
A B
$ !!
echo AB
AB


And perhaps most surprising, if PS0 contains a command substitution,
the wrong prompts are shown:

PS0='$(:)' bash --norc
$ uname \
> ^DLinux   # The command is executed (without a newline printed)
> uname \   # PS2 is displayed again, but we are actually reading a new cmd
$ -n# PS1 is used for the continuation line
localhost



Re: heap-use-after-free in rl_do_undo

2023-04-26 Thread Grisha Levit
On Wed, Apr 26, 2023 at 2:15 PM Chet Ramey  wrote:
> This one looks like it was already hit by the other guy sending in fuzzing 
> output.

You're right, sorry I missed that.



Re: heap-buffer-overflow in history_expand

2023-04-28 Thread Grisha Levit
On Fri, Apr 28, 2023, 11:35 Chet Ramey  wrote:

> On 4/24/23 1:40 AM, Grisha Levit wrote:
> > The history expansion code can end up reading past the end of the
> > input line buffer if the line ends with an invalid multibyte sequence:
>
> Thanks for the report. You mean an incomplete multibyte character, I think.
>

Well I'm not quite sure. The (piped) input needs to have an invalid
sequence (two leading bytes) but readline transforms this invalid sequence
into a just a single leading byte.

Piping input that simply ends in an leading byte doesn't trigger the issue
-- that byte byte don't seem to make it into the input line.

This is a bit off topic, but I don't really understand what happens with
invalid input sequences in the input, see e.g.:

$ bash --norc -i 2>/dev/null <<<$'printf %qn \240\340'
$'\240'
$ bash --norc -i 2>/dev/null <<<$'printf %qn \240\340.'
$'\240.'
$ bash --norc -i 2>/dev/null <<<$'printf %qn \240\340.\341'
$'\240.\340'

(Especially the last one where the 2nd and 3rd bytes of the string are
reversed)


Re: heap-buffer-overflow in history_expand

2023-04-30 Thread Grisha Levit
On Sat, Apr 29, 2023, 14:02 Chet Ramey  wrote:

> On 4/28/23 9:28 PM, Grisha Levit wrote:
> > Piping input that simply ends in an leading byte doesn't trigger the
> issue
> > -- that byte byte don't seem to make it into the input line.
> >
> > This is a bit off topic, but I don't really understand what happens with
> > invalid input sequences in the input, see e.g.:
>
> They should be treated as individual bytes.
>

I think I see what's happening now. Readline accumulates the bytes until a
complete character is read. However, this buffer is not flushed when the
reading of a multibyte character is interrupted by inserting a single byte
character, or by any non-insertion command.

So for example, the \317 byte never gets a chance to be inserted here:
bash --norc -in <<<$':\317:'
$ ::

And inserting the byte is deferred until the next byte with the 8th bit set
is read (which can be at some arbitrary future time) here:
bash --norc -in <<<$':\317\n: \200'
$ :
$ : π

You can also reproduce interactively by binding the above input to a macro.

Attached is a patch that I think should address this.
From d7d1043e2bc2ad35ac2ef6a2cc45467085cf5835 Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Fri, 28 Apr 2023 23:24:34 -0400
Subject: [PATCH] fix invalid mb insert

lib/readline/text.c
	- _rl_insert_char: return 1 when there are any pending bytes
	- _rl_insert_char: treat being called with a count of 0 as a
	  signal to insert any pending bytes read so far.
	- rl_insert, rl_quoted_insert: if the final _rl_insert_char
	  returns 1, call _rl_insert_char with a count of 0 to insert
	  any pending bytes.
---
 lib/readline/text.c | 68 +++--
 1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/lib/readline/text.c b/lib/readline/text.c
index 356cac5f..27d9292b 100644
--- a/lib/readline/text.c
+++ b/lib/readline/text.c
@@ -704,7 +704,8 @@ static mbstate_t ps = {0};
 
 /* Insert the character C at the current location, moving point forward.
If C introduces a multibyte sequence, we read the whole sequence and
-   then insert the multibyte char into the line buffer. */
+   then insert the multibyte char into the line buffer.
+   If COUNT is 0, just insert any accumulated multibyte sequence as is. */
 int
 _rl_insert_char (int count, int c)
 {
@@ -718,11 +719,32 @@ _rl_insert_char (int count, int c)
   static int stored_count = 0;
 #endif
 
+#if !defined (HANDLE_MULTIBYTE)
   if (count <= 0)
 return 0;
+#else
+  if (count < 0)
+return 0;
+  if (count == 0)
+{
+  /* If COUNT is 0, we were called because we are no longer reading
+ a multibyte character. If we were waiting to complete a character,
+just insert any bytes we have read so far. */
+  if (pending_bytes_length == 0)
+	return 0;
 
-#if defined (HANDLE_MULTIBYTE)
-  if (MB_CUR_MAX == 1 || rl_byte_oriented)
+  if (stored_count <= 0)
+	stored_count = count;
+  else
+	count = stored_count;
+
+  memcpy (incoming, pending_bytes, pending_bytes_length);
+  incoming[pending_bytes_length] = '\0';
+  incoming_length = pending_bytes_length;
+  pending_bytes_length = 0;
+  memset (&ps, 0, sizeof (mbstate_t));
+}
+  else if (MB_CUR_MAX == 1 || rl_byte_oriented)
 {
   incoming[0] = c;
   incoming[1] = '\0';
@@ -730,6 +752,9 @@ _rl_insert_char (int count, int c)
 }
   else if (_rl_utf8locale && (c & 0x80) == 0)
 {
+  if (pending_bytes_length)
+_rl_insert_char (0, 0);
+
   incoming[0] = c;
   incoming[1] = '\0';
   incoming_length = 1;
@@ -827,7 +852,11 @@ _rl_insert_char (int count, int c)
   rl_insert_text (string);
   xfree (string);
 
+#if defined (HANDLE_MULTIBYTE)
+  return pending_bytes_length ? 1 : 0;
+#else
   return 0;
+#endif
 }
 
   if (count > TEXT_COUNT_MAX)
@@ -860,6 +889,8 @@ _rl_insert_char (int count, int c)
   xfree (string);
   incoming_length = 0;
   stored_count = 0;
+
+  return pending_bytes_length ? 1 : 0;
 #else /* !HANDLE_MULTIBYTE */
   char str[TEXT_COUNT_MAX+1];
 
@@ -873,9 +904,9 @@ _rl_insert_char (int count, int c)
 	  rl_insert_text (str);
 	  count -= decreaser;
 	}
-#endif /* !HANDLE_MULTIBYTE */
 
   return 0;
+#endif /* !HANDLE_MULTIBYTE */
 }
 
   if (MB_CUR_MAX == 1 || rl_byte_oriented)
@@ -903,9 +934,12 @@ _rl_insert_char (int count, int c)
   rl_insert_text (incoming);
   stored_count = 0;
 }
-#endif
 
+  return pending_bytes_length ? 1 : 0;
+#else /* !HANDLE_MULTIBYTE */
   return 0;
+#endif /* !HANDLE_MULTIBYTE */
+
 }
 
 /* Overwrite the character at point (or next COUNT characters) with C.
@@ -955,7 +989,7 @@ rl_insert (int count, int c)
   r = (rl_insert_mode == RL_IM_INSERT) ? _rl_insert_char (count, c) : _rl_overwrite_char (count, c);
 
 

Re: nofork command substitution

2023-05-19 Thread Grisha Levit
This will be a very neat feature to have.  One thing I'm excited about
is the ability to cleanly perform assignments during prompt expansion
(e.g. to capture information during PS0 or PS4 expansion).

Something I suspect people will complain about is that this change
makes some polyglot zsh/bash scripts that were previously
syntactically valid no longer so.  A particularly prominent one is
git-completion.bash [1] distributed with git, which now reports syntax
errors when sourced due to the expansion on line 414:

   413if [[ -n ${ZSH_VERSION-} ]]; then
   414unset ${(M)${(k)parameters[@]}:#__gitcomp_builtin_*} 2>/dev/null
   415else
   416unset $(compgen -v __gitcomp_builtin_)
   417fi

Hopefully, there are not too many such scripts and they can just gain
some workarounds (like wrapping the zsh-only code in an eval).

[1]: 
https://github.com/git/git/blob/v2.40.1/contrib/completion/git-completion.bash#L414



Re: parsing command substitution inside parameter expansion in interactive shell

2023-05-23 Thread Grisha Levit
On Mon, Mar 20, 2023 at 4:59 PM Chet Ramey  wrote:
> Thanks for the report. This is the same thing as
>
> https://lists.gnu.org/archive/html/bug-bash/2021-06/msg00115.html
>
> with the command substitution being embedded in the parameter expansion.

One more similar case when the parameter expansion is quoted:

bash --norc -in <<<$'"${_+$("\n")}"'
bash-5.2$ "${_+$("
bash: command substitution: line 2: unexpected EOF while looking for
matching `"'
> ")}"

bash --norc -in <<<$'"${_+$(:\n)}"'
bash-5.2$ "${_+$(:
bash: command substitution: line 3: unexpected EOF while looking for
matching `)'
> )}"


Re: nofork command substitution

2023-05-23 Thread Grisha Levit
An interactive shell gets confused (PS1 shown instead of PS2) when using
newline as the first character of the substitution:

$ bash --norc -i <<<$'${\n:;}'
bash-5.2$ ${
bash-5.2$ :;}


Things don't seem to work right with an empty funsub:

bash --pretty-print <<<'${ }'
${ ; }

$ bash -c '${ }'
ERROR: AddressSanitizer: heap-buffer-overflow
READ of size 1 at 0x005af9470f1f thread T0
#0 0x56ec06e390 in parse_comsub parse.y:4462:15
#1 0x56ec058344 in read_token_word parse.y:5294:16
#2 0x56ec04c144 in read_token parse.y:3642:12

I'm not sure if it makes sense to accept `${ }' (like `$( )') or not (like
`{ }'). FWIW, mksh accepts both.


Unset during expansion w/ nofork sub

2023-05-24 Thread Grisha Levit
There are a lot of code paths that (reasonably) do not expect a variable to
disappear while being expanded/assigned to, and usually cause a segfault if
that happens (as is now possible with the new nofork command substitution).

I don't think there is any legitimate use case to be supported here, and I
can't really think of a sensible way to avoid this without adding a bunch
of overhead.

Hypothetically, bash could perform another variable lookup to confirm after
expanding the other words in the PE, but this might have to deal with
issues like array types changing when a new variable is brought into scope,
etc.

A similar situation is already ignored in mapfile's callback (which can
unset the array being populated) and it seems fine to me to ignore these
too, but posting in case others have better ideas.


a=(x); a=(${ unset a; })
heap-use-after-free arrayfunc.c:667 in assign_compound_array_list

a=(x); : ${a[${ unset a; }]}
heap-use-after-free arrayfunc.c:1499 in array_value_internal

a=(x); : ${#a[${ unset a; }]}
heap-use-after-free subst.c:7378 in array_length_reference

a=(x); : ${a[@]/${ unset a; }}
heap-use-after-free subst.c:9331 in parameter_brace_patsub

a=(x); : ${a[@]:${ unset a; }}
heap-use-after-free subst.c:8305:11 in verify_substring_values

a=(x); : ${a[@]:0:${ unset a; }}
heap-use-after-free subst.c:8964:16 in parameter_brace_substring

declare -A A; A=(x ${ unset A; })
heap-use-after-free variables.c:2859:36 in make_variable_value


Re: heap-buffer-overflow in history_expand

2023-05-25 Thread Grisha Levit
On Mon, May 1, 2023 at 11:48 AM Chet Ramey  wrote:
> Yes, I concluded the same thing. Thanks for the patch. I have one question
> about the change to rl_insert: why overwrite any return value from the
> initial call to _rl_insert_char by setting r back to 0? What if the initial
> value of C starts an incomplete multibyte character, and is then followed
> by a character that doesn't contribute?

You're right, that was a mistake, I missed the presence of that
initial _rl_insert_char call.

I noticed a couple of other bits missing from the patch as applied
though.  (The first because pending_bytes_length is not defined
without HANDLE_MULTIBYTE, the second to have quoted insert work
without a negative argument).

---

diff --git a/lib/readline/text.c b/lib/readline/text.c
index b07ff470..e3e5bb9e 100644
--- a/lib/readline/text.c
+++ b/lib/readline/text.c
@@ -853,7 +853,11 @@ _rl_insert_char (int count, int c)
   rl_insert_text (string);
   xfree (string);

+#if defined (HANDLE_MULTIBYTE)
   return (pending_bytes_length != 0);
+#else
+  return 0;
+#endif
 }

   if (count > TEXT_COUNT_MAX)
@@ -1112,6 +1116,8 @@ rl_quoted_insert (int count, int key)
r = _rl_insert_next (1);
   while (r == 0 && ++count < 0);
 }
+  else
+r = _rl_insert_next (count);

   if (r == 1)
 _rl_insert_char (0, 0);/* insert partial multibyte character */



\U expansion in single-byte locale

2023-05-26 Thread Grisha Levit
If expanding a \u (or \U) escape sequence fails, Bash replaces the
input escape sequence with a newly generated one:

$ LC_ALL=C printf %b \\U80
\u0080

Since this new sequence may by longer than the input, it can cause an
overflow in printf:

$ bash-asan -c 'LC_ALL=C printf %b \\U80'
ERROR: AddressSanitizer: heap-buffer-overflow ...
WRITE of size 1 at 0x0001062028f5 thread T0
#0 0x1031bed94 in bexpand printf.def:1180
#1 0x1031ad19c in printf_builtin printf.def:610

TBH I found this fallback behavior kind of surprising and was
expecting the escape sequence to expand to either a null string or to
itself (though some tools assume __STDC_ISO_10646__ behavior even
without checking and expand to what may (or may not) be the correct
codepoint).



ctype.h functions on bytes 0x80..0xFF

2023-05-26 Thread Grisha Levit
On Mon, May 1, 2023 at 11:48 AM Chet Ramey  wrote:
>
> (And once we get these issues straightened out, if you look back to your
> original example, 0x240 is a blank in my locale, en_US.UTF-8, and will be
> removed from the input stream by the parser unless it's quoted.)

On at least recent macos versions, it seems that the ctype.h functions
treat [0x80..0xFF] the same as wctype.h functions would.  So while
U+00A0 is a space character in the en_US.UTF-8 locale, and
iswspace(L'\u00A0') returns 1, it is also the case that isspace(0xA0)
returns 1.  But I don't think it's correct to actually rely on the
latter since the single byte 0xA0 doesn't represent _any_ character in
the locale, much less a space.

(I think that's the reason for the behavior Chet noted above from a
previous thread).

For example, these outputs would be correct with \uA0 in place of \xA0
below, but I don't think the current behaviour is expected:

$ eval $'printf "<%s>" [\xA0\xA0]'
<[><]>

[[ $'\xA0' == [[:space:]] ]]; echo $?
0

Perhaps on platforms like this it would be appropriate to mask ctype
results with something equivalent to `btowc(c) != WEOF'?

(See http://www.openradar.me/FB9973780 for an example of the issue in
an apple-supplied program)



Re: ctype.h functions on bytes 0x80..0xFF

2023-05-27 Thread Grisha Levit
The below seems like a cheap fix for UTF-8 locales. Since Bash falls
back to using the single-byte glob matching functions when presented
with invalid multibyte strings, this patch makes the glob code avoid
calling the ctype functions or strcoll when handling individual bytes
>0x7F (in a UTF-8 locale).

This makes the following no longer evaluate to true on macos:

[[ $'\xC0' == [[:upper:]] ]]
[[ $'\xC0' == [[=A=]] ]]
[[ $'\xC0' == $'\xE0' ]]  # with nocasematch

And on Linux with glibc (tested on Ubuntu 22.04) in en_US.UTF-8,
strcoll returns 0 for any two invalid bytes, so the following is also
no longer true:

x=$'\x80'; [[ $'\xC0' == [[=$x=]] ]]

The locale_setblanks change is for the macos issue with 0xA0 being
treated as a blank (as U+00A0).  There's no other code that changes
CSHBRK in sh_syntaxtab so I think the simplifications are OK.

---
diff --git a/lib/glob/smatch.c b/lib/glob/smatch.c
index 12eb9d27..1c6b0229 100644
--- a/lib/glob/smatch.c
+++ b/lib/glob/smatch.c
@@ -141,6 +141,9 @@ rangecmp (int c1, int c2, int forcecoll)
 static int
 collseqcmp (int c, int equiv)
 {
+  if (locale_utf8locale && (UTF8_SINGLEBYTE (c) == 0 ||
UTF8_SINGLEBYTE (equiv) == 0))
+return (c == equiv);
+
   if (charcmp (c, equiv, 1) == 0)
 return 1;

@@ -281,6 +284,9 @@ is_cclass (int c, const char *name)
   enum char_class char_class;
   int result;

+  if (locale_utf8locale && UTF8_SINGLEBYTE(c) == 0)
+return -1;
+
   char_class = is_valid_cclass (name);
   if (char_class == CC_NO_CLASS)
 return -1;
@@ -291,7 +297,8 @@ is_cclass (int c, const char *name)

 /* Now include `sm_loop.c' for single-byte characters. */
 /* The result of FOLD is an `unsigned char' */
-# define FOLD(c) ((flags & FNM_CASEFOLD) \
+# define FOLD(c) (((flags & FNM_CASEFOLD) && \
+(locale_utf8locale == 0 || UTF8_SINGLEBYTE (c))) \
  ? TOLOWER ((unsigned char)c) \
  : ((unsigned char)c))

diff --git a/locale.c b/locale.c
index eb24a517..b918db37 100644
--- a/locale.c
+++ b/locale.c
@@ -584,15 +584,10 @@ locale_setblanks (void)

   for (x = 0; x < sh_syntabsiz; x++)
 {
-  if (isblank ((unsigned char)x))
- sh_syntaxtab[x] |= CSHBRK|CBLANK;
-  else if (member (x, shell_break_chars))
- {
-   sh_syntaxtab[x] |= CSHBRK;
-   sh_syntaxtab[x] &= ~CBLANK;
- }
+  if ((locale_utf8locale == 0 || (x & 0x80) == 0) && isblank
((unsigned char)x))
+ sh_syntaxtab[x] |= CBLANK;
   else
- sh_syntaxtab[x] &= ~(CSHBRK|CBLANK);
+ sh_syntaxtab[x] &= ~CBLANK;
 }
 }



No form commsub if last token ends with & or ;

2023-05-27 Thread Grisha Levit
Missing final `;':

"$BASH" --pretty-print <<< $'${ : \;;}'
${ : \; }

"$BASH" --pretty-print <<< $'${ : \;\n}'
${ : \; }

"$BASH" --pretty-print <<< $'${ : \&;}'
${ : \& }

"$BASH" --pretty-print <<< $'${ : \&\n}'
${ : \& }

Correct:

"$BASH" --pretty-print <<< $'${ : \;&}'
${ : \; & }

"$BASH" --pretty-print <<< $'${ : \&&}'
${ : \& & }


[PATCH] leak in rl_filename_completion_function

2023-05-31 Thread Grisha Levit
If rl_filename_rewrite_hook returns a new string for a filename (which
I guess only happens on macOS with bash), it is in most cases not
free-d.

run() {
for ((i=0; i<=1; i++)); do
((i%1000)) || ps -o rss= $BASHPID
compgen -f . >/dev/null
done
}
$ (run)
  2160
  4576
  6864
  9040
 11232
 13424
 15584
 17712
 19856
 22000
 24144

---
diff --git a/lib/readline/complete.c b/lib/readline/complete.c
index bf7cc856..99556a35 100644
--- a/lib/readline/complete.c
+++ b/lib/readline/complete.c
@@ -2521,6 +2521,9 @@ rl_filename_completion_function (const char
*text, int state)
   convfn = dentry = 0;
   while (directory && (entry = readdir (directory)))
 {
+  if (convfn != dentry)
+xfree (convfn);
+
   convfn = dentry = entry->d_name;
   convlen = dentlen = D_NAMLEN (entry);

@@ -2572,6 +2575,9 @@ rl_filename_completion_function (const char
*text, int state)
  users_dirname = (char *)NULL;
}

+  if (convfn != dentry)
+xfree (convfn);
+
   return (char *)NULL;
 }
   else



[PATCH] use-after-free in expand_string_dollar_quote

2023-06-01 Thread Grisha Levit
A use-after-free happens in expand_string_dollar_quote if
noexpand_translation is enabled and a string's translation is the same
length as the string itself.
---
diff --git a/subst.c b/subst.c
index 08d9285e..a7a386d4 100644
--- a/subst.c
+++ b/subst.c
@@ -4231,12 +4231,17 @@ expand_string_dollar_quote (const char
*string, int flags)
  continue;
}
  trans = locale_expand (t, 0, news-sindex, 0, &translen);
- free (t);
  if (singlequote_translations &&
((news-sindex-1) != translen || STREQN (t, trans,
translen) == 0))
-   t = sh_single_quote (trans);
+   {
+ free (t);
+ t = sh_single_quote (trans);
+   }
  else
-   t = sh_mkdoublequoted (trans, translen, 0);
+   {
+ free (t);
+ t = sh_mkdoublequoted (trans, translen, 0);
+   }
  sindex = news;
}
 #endif /* TRANSLATABLE_STRINGS */



bug-bash@gnu.org

2023-06-10 Thread Grisha Levit
The command printing code can fail to add a required semicolon when the
last word in the command ends with `&'

$ bash --pretty-print <<<$'{ \&;}'
{ \& }

$ f() { if echo \&; then :; fi; }
$ declare -f f
f ()
{
if echo \& then
:;
fi
}
$ eval "$(declare -f f)"
bash: syntax error near unexpected token `fi'
---
diff --git a/print_cmd.c b/print_cmd.c
index 29870837..961c8dae 100644
--- a/print_cmd.c
+++ b/print_cmd.c
@@ -1446,8 +1446,10 @@ indent (int amount)
 static void
 semicolon (void)
 {
-  if (command_string_index > 0 &&
-   (the_printed_command[command_string_index - 1] == '&' ||
+  if ((command_string_index > 1 &&
+   the_printed_command[command_string_index - 2] == ' ' &&
+   the_printed_command[command_string_index - 1] == '&') ||
+  (command_string_index > 0 &&
the_printed_command[command_string_index - 1] == '\n'))
 return;
   cprintf (";")


[PATCH] fix compgen -V leak

2023-06-16 Thread Grisha Levit
My earlier patch for adding compgen -V did the variable assignment in a
pretty silly way and had a small memory leak to boot. Hope this new way
makes sense, sorry for the extra work.
---
>From b6b13b89e1436ddd575483a81e79ef43d82a5c0c Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Sat, 3 Jun 2023 16:37:02 -0400
Subject: [PATCH] fixup compgen -V array handling

* builtins/complete.def
- compgen_builtin: skip the unnecessary WORD_LIST conversion, avoid
  small leak

* externs.h
- remove strlist_to_word_list which was only added for initial version
  of compgen -V support
---
 builtins/complete.def | 7 +--
 externs.h | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/builtins/complete.def b/builtins/complete.def
index 890cf20d..bb03c6e9 100644
--- a/builtins/complete.def
+++ b/builtins/complete.def
@@ -678,7 +678,6 @@ compgen_builtin (WORD_LIST *list)
   int old_ind, old_completion, old_quoting, old_suppress;
   SHELL_VAR *var;
   char *varname;
-  WORD_LIST *alist;

   if (list == 0)
 return (EXECUTION_SUCCESS);
@@ -763,11 +762,7 @@ compgen_builtin (WORD_LIST *list)
   var = builtin_find_indexed_array (varname, 1);
   if (var && sl && sl->list && sl->list_len)
  {
-   alist = strlist_to_word_list (sl, 0, 0);
-   assign_array_var_from_word_list (var, alist, 0);
-   free (sl);
-   sl = (STRINGLIST *)NULL;
-   dispose_words (alist);
+   array_from_argv (array_cell(var), sl->list, sl->list_len);
rval = EXECUTION_SUCCESS;
  }
 }
diff --git a/externs.h b/externs.h
index a1363d4d..fe5aa492 100644
--- a/externs.h
+++ b/externs.h
@@ -414,7 +414,6 @@ extern STRINGLIST *strlist_prefix_suffix (STRINGLIST *,
const char *, const char
 extern void strlist_print (STRINGLIST *, const char *);
 extern void strlist_walk (STRINGLIST *, sh_strlist_map_func_t *);
 extern void strlist_sort (STRINGLIST *);
-extern WORD_LIST *strlist_to_word_list (STRINGLIST *, int, int);

 /* declarations for functions defined in lib/sh/stringvec.c */

-- 
2.41.0


[PATCH] uninitialized variable access

2023-06-16 Thread Grisha Levit
Some uninitialized variable access identified by clang's static analyzer.
(FWIW 90% of the reports were bogus but these seem legit)

* lib/readline/input.c
- rl_gather_tyi: the `result' variable is no longer initialized before
first access since commit d0bc56a32

* lib/readline/kill.c
- _rl_read_bracketed_paste_prefix: if there's no further input after the
initial \e of the bracketed paste prefix, an uninitialized value of `key'
can get used in the return value

* subst.c
- function_substitute: seems like the `tflag' assignment ended up in the
wrong place?

* builtins/read.def
- read_builtin: with `read -n0', the initialization of saw_escape is goto-d
over but that variable is later accessed

---
diff --git a/lib/readline/input.c b/lib/readline/input.c
index 229474ff..00605834 100644
--- a/lib/readline/input.c
+++ b/lib/readline/input.c
@@ -252,6 +252,7 @@ rl_gather_tyi (void)
   chars_avail = 0;
   input = 0;
   tty = fileno (rl_instream);
+  result = -1;

   /* Move this up here to give it first shot, but it can't set chars_avail
*/
   /* XXX - need rl_chars_available_hook? */
diff --git a/lib/readline/kill.c b/lib/readline/kill.c
index 1dfe3c57..1f13e447 100644
--- a/lib/readline/kill.c
+++ b/lib/readline/kill.c
@@ -779,7 +779,7 @@ _rl_read_bracketed_paste_prefix (int c)
   pbpref = BRACK_PASTE_PREF; /* XXX - debugging */
   if (c != pbpref[0])
 return (0);
-  pbuf[ind = 0] = c;
+  pbuf[ind = 0] = key = c;
   while (ind < BRACK_PASTE_SLEN-1 &&
  (RL_ISSTATE (RL_STATE_INPUTPENDING|RL_STATE_MACROINPUT) == 0) &&
  _rl_pushed_input_available () == 0 &&
diff --git a/subst.c b/subst.c
index 08d9285e..e69e0e5b 100644
--- a/subst.c
+++ b/subst.c
@@ -7021,7 +7021,6 @@ function_substitute (char *string, int quoted, int
flags)
   /* We call anonclose as part of the outer nofork unwind-protects */
   BLOCK_SIGNAL (SIGINT, set, oset);
   lseek (afd, 0, SEEK_SET);
-  tflag = 0;
   istring = read_comsub (afd, quoted, flags, &tflag);
   UNBLOCK_SIGNAL (oset);
 }
@@ -7029,6 +7028,7 @@ function_substitute (char *string, int quoted, int
flags)
 {
   s = get_string_value ("REPLY");
   istring = s ? comsub_quote_string (s, quoted, flags) : savestring
("");
+  tflag = 0;
 }

   run_unwind_frame ("nofork comsub");  /* restores stdout, job control
stuff */

diff --git a/builtins/read.def b/builtins/read.def
index cb4e1e59..80d1241d 100644
--- a/builtins/read.def
+++ b/builtins/read.def
@@ -403,6 +403,9 @@ read_builtin (WORD_LIST *list)
   input_string = (char *)xmalloc (size = 112); /* XXX was 128 */
   input_string[0] = '\0';

+  pass_next = 0;   /* Non-zero signifies last char was backslash. */
+  saw_escape = 0;  /* Non-zero signifies that we saw an escape char */
+
   /* More input and options validation */
   if (nflag == 1 && nchars == 0)
 {
@@ -463,9 +466,6 @@ read_builtin (WORD_LIST *list)
 add_unwind_protect (xfree, rlbuf);
 #endif

-  pass_next = 0;   /* Non-zero signifies last char was backslash. */
-  saw_escape = 0;  /* Non-zero signifies that we saw an escape char */
-
   if (tmsec > 0 || tmusec > 0)
 {
   /* Turn off the timeout if stdin is a regular file (e.g. from


[PATCH] null pointer deref in bindpwd

2023-06-16 Thread Grisha Levit
Only triggered by doing something stupid:

bash -c 'declare -n OLDPWD=X[SHLVL=-1]; /; cd /'
bash: line 1: X[SHLVL=-1]: bad array subscript
Segmentation fault: 11

---
diff --git a/builtins/cd.def b/builtins/cd.def
index de123f8b..e3156463 100644
--- a/builtins/cd.def
+++ b/builtins/cd.def
@@ -158,10 +158,9 @@ bindpwd (int no_symlinks)
   pwdvar = get_string_value ("PWD");

   tvar = bind_variable ("OLDPWD", pwdvar, 0);
-  if (tvar && readonly_p (tvar))
+  if (tvar == 0 || readonly_p (tvar))
 r = EXECUTION_FAILURE;
-
-  if (old_anm == 0 && array_needs_making && exported_p (tvar))
+  else if (old_anm == 0 && array_needs_making && exported_p (tvar))
 {
   update_export_env_inplace ("OLDPWD=", 7, pwdvar);
   array_needs_making = 0;


Various small leaks

2023-06-17 Thread Grisha Levit
Found mostly by normal usage running a no-bash-malloc build with clang's
LeakSanitizer enabled. So far seems to provide very accurate results with
little overhead.

* arrayfunc.c
- quote_compound_array_word: make sure to free VALUE
- bind_assoc_var_internal: if assigning to a dynamic variable, make sure
  to free the key (usually assoc_insert would do it)

* bashline.c
- bash_command_name_stat_hook: free original *NAME if we are going to
  change what it points to (what the callers seem to expect)

* builtins/evalstring.c
- parse_and_execute: make sure to dispose of the parsed command
  resulting from a failed function import attempt
- open_redir_file: if we did not get a pointer to pass back the expanded
  filename, make sure to free the name

* examples/loadables/kv.c
- kvsplit: bind_assoc_variable does not free its VALUE argument so do
  not pass a copy in the call calling

* examples/loadables/stat.c
- loadstat: bind_assoc_variable does not free its VALUE argument so make
  sure to do it

* subst.c
- param_expand: free temp1 value for codepaths that don't do it
---
 arrayfunc.c   | 6 +-
 bashline.c| 1 +
 builtins/evalstring.c | 4 
 examples/loadables/kv.c   | 2 +-
 examples/loadables/stat.c | 1 +
 subst.c   | 2 ++
 6 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arrayfunc.c b/arrayfunc.c
index 28aee54b..ea7a559d 100644
--- a/arrayfunc.c
+++ b/arrayfunc.c
@@ -193,7 +193,10 @@ bind_assoc_var_internal (SHELL_VAR *entry, HASH_TABLE
*hash, char *key, const ch
   newval = make_array_variable_value (entry, 0, key, value, flags);

   if (entry->assign_func)
-(*entry->assign_func) (entry, newval, 0, key);
+{
+  (*entry->assign_func) (entry, newval, 0, key);
+  FREE (key);
+}
   else
 assoc_insert (hash, key, newval);

@@ -945,6 +948,7 @@ quote_compound_array_word (char *w, int type)
   if (t != w+ind)
free (t);
   strcpy (nword + i, value);
+  free (value);

   return nword;
 }
diff --git a/bashline.c b/bashline.c
index 6adc548e..c14d465f 100644
--- a/bashline.c
+++ b/bashline.c
@@ -1923,6 +1923,7 @@ bash_command_name_stat_hook (char **name)
   result = search_for_command (cname, 0);
   if (result)
 {
+  FREE (*name);
   *name = result;
   return 1;
 }
diff --git a/builtins/evalstring.c b/builtins/evalstring.c
index 11785de1..f4f62b2d 100644
--- a/builtins/evalstring.c
+++ b/builtins/evalstring.c
@@ -468,6 +468,8 @@ parse_and_execute (char *string, const char *from_file,
int flags)
should_jump_to_top_level = 0;
last_result = last_command_exit_value = EX_BADUSAGE;
set_pipestatus_from_exit (last_command_exit_value);
+   dispose_command(command);
+   global_command = (COMMAND *)NULL;
reset_parser ();
break;
  }
@@ -766,6 +768,8 @@ open_redir_file (REDIRECT *r, char **fnp)

   if (fnp)
 *fnp = fn;
+  else
+free (fn);
   return fd;
 }

diff --git a/examples/loadables/kv.c b/examples/loadables/kv.c
index 1dfceb6a..ee6dc7f0 100644
--- a/examples/loadables/kv.c
+++ b/examples/loadables/kv.c
@@ -71,7 +71,7 @@ kvsplit (SHELL_VAR *v, char *line, char *dstring)
   else
 value = "";

-  return (bind_assoc_variable (v, name_cell (v), savestring (key),
savestring (value), 0) != 0);
+  return (bind_assoc_variable (v, name_cell (v), savestring (key), value,
0) != 0);
 }

 int
diff --git a/examples/loadables/stat.c b/examples/loadables/stat.c
index 1093f7b0..d58f07e5 100644
--- a/examples/loadables/stat.c
+++ b/examples/loadables/stat.c
@@ -330,6 +330,7 @@ loadstat (char *vname, SHELL_VAR *var, char *fname, int
flags, char *fmt, struct
   key = savestring (arraysubs[i]);
   value = statval (i, fname, flags, fmt, sp);
   v = bind_assoc_variable (var, vname, key, value, ASS_FORCE);
+  free (value);
 }
   return 0;
 }
diff --git a/subst.c b/subst.c
index b93374b8..0ecb4cf1 100644
--- a/subst.c
+++ b/subst.c
@@ -10850,6 +10850,7 @@ comsub:
  {
chk_atstar (temp, quoted, pflags, quoted_dollar_at_p,
contains_dollar_at);
tdesc = parameter_brace_expand_word (temp, SPECIAL_VAR (temp, 0),
quoted, pflags, 0);
+   free (temp1);
if (tdesc == &expand_wdesc_error || tdesc == &expand_wdesc_fatal)
  return (tdesc);
ret = tdesc;
@@ -10862,6 +10863,7 @@ comsub:
  {
set_exit_status (EXECUTION_FAILURE);
report_error (_("%s: invalid variable name for name reference"),
temp);
+   free (temp1);
return (&expand_wdesc_error); /* XXX */
  }
else
From 892cb679195298496a8fc86c36825b20b0a8e07c Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Sat, 3 Jun 2023 16:51:26 -0400
Subject: [PATCH] various leaks

Found mostly by normal usage running a no-bash-malloc build with clang's
LeakSanitizer enabled. So far seems to provide very accurate results.

* arrayfunc.c
- quote_compound_array_word: make sure to free VALUE
- bind_asso

[PATCH] completion display interrupt leak handling

2023-06-17 Thread Grisha Levit
When readline's display of completion matches is interrupted, the match
list is only freed for the '?' completion type but not for '!' and '@'
types.  I couldn't find a specific reason for that to be the case so
hope this patch is an OK fix.

Also add freeing saved_line_buffer from rl_complete_internal to the
signal handler and freeing of matches in one spot where menu completion
doesn't do it.


* lib/readline/complete.c
- complete_sigcleanarg_t: new struct to hold _rl_complete_sigcleanup
  argument
- _rl_complete_sigcleanup: use new struct instead of *matches pointer,
  now also handles freeing saved_line
- rl_complete_internal: set up _rl_sigcleanup whenever display_matches
  is called so we no longer leak matches and saved_line_buffer when
  getting a SIGINT
- rl_menu_complete: free contents of matches instead of just pointer
  when completion-query-items exceeded
---
 lib/readline/complete.c | 61 ++---
 1 file changed, 39 insertions(+), 22 deletions(-)

diff --git a/lib/readline/complete.c b/lib/readline/complete.c
index bf7cc856..f8f08aa6 100644
--- a/lib/readline/complete.c
+++ b/lib/readline/complete.c
@@ -494,12 +494,19 @@ _rl_reset_completion_state (void)
   rl_completion_quote_character = 0;
 }

+typedef struct {
+char **matches;
+char *saved_line;
+} complete_sigcleanarg_t;
+
 static void
 _rl_complete_sigcleanup (int sig, void *ptr)
 {
+  complete_sigcleanarg_t *arg = ptr;
   if (sig == SIGINT) /* XXX - for now */
 {
-  _rl_free_match_list ((char **)ptr);
+  _rl_free_match_list (arg->matches);
+  FREE (arg->saved_line);
   _rl_complete_display_matches_interrupt = 1;
 }
 }
@@ -2008,7 +2015,8 @@ rl_complete_internal (int what_to_do)
   int start, end, delimiter, found_quote, i, nontrivial_lcd;
   char *text, *saved_line_buffer;
   char quote_char;
-  int tlen, mlen, saved_last_completion_failed;
+  int tlen, mlen, saved_last_completion_failed, do_display;
+  complete_sigcleanarg_t cleanarg;

   RL_SETSTATE(RL_STATE_COMPLETING);

@@ -2088,6 +2096,8 @@ rl_complete_internal (int what_to_do)
   if (matches && matches[0] && *matches[0])
 last_completion_failed = 0;

+  do_display = 0;
+
   switch (what_to_do)
 {
 case TAB:
@@ -2121,13 +2131,13 @@ rl_complete_internal (int what_to_do)
  {
if (what_to_do == '!')
  {
-   display_matches (matches);
+   do_display = 1;
break;
  }
else if (what_to_do == '@')
  {
if (nontrivial_lcd == 0)
- display_matches (matches);
+ do_display = 1;
break;
  }
else if (rl_editing_mode != vi_mode)
@@ -2151,23 +2161,7 @@ rl_complete_internal (int what_to_do)
append_to_match (matches[0], delimiter, quote_char, nontrivial_lcd);
break;
  }
-
-  if (rl_completion_display_matches_hook == 0)
- {
-   _rl_sigcleanup = _rl_complete_sigcleanup;
-   _rl_sigcleanarg = matches;
-   _rl_complete_display_matches_interrupt = 0;
- }
-  display_matches (matches);
-  if (_rl_complete_display_matches_interrupt)
-{
-  matches = 0; /* already freed by rl_complete_sigcleanup */
-  _rl_complete_display_matches_interrupt = 0;
-   if (rl_signal_event_hook)
- (*rl_signal_event_hook) (); /* XXX */
-}
-  _rl_sigcleanup = 0;
-  _rl_sigcleanarg = 0;
+  do_display = 1;
   break;

 default:
@@ -2180,6 +2174,29 @@ rl_complete_internal (int what_to_do)
   return 1;
 }

+  if (do_display)
+{
+  if (rl_completion_display_matches_hook == 0)
+ {
+   _rl_sigcleanup = _rl_complete_sigcleanup;
+   cleanarg.matches = matches;
+   cleanarg.saved_line = saved_line_buffer;
+   _rl_sigcleanarg = &cleanarg;
+   _rl_complete_display_matches_interrupt = 0;
+ }
+  display_matches (matches);
+  if (_rl_complete_display_matches_interrupt)
+ {
+   matches = 0; /* already freed by rl_complete_sigcleanup */
+   saved_line_buffer = 0;
+   _rl_complete_display_matches_interrupt = 0;
+   if (rl_signal_event_hook)
+ (*rl_signal_event_hook) (); /* XXX */
+ }
+  _rl_sigcleanup = 0;
+  _rl_sigcleanarg = 0;
+}
+
   _rl_free_match_list (matches);

   /* Check to see if the line has changed through all of this
manipulation. */
@@ -2864,7 +2881,7 @@ rl_menu_complete (int count, int ignore)
if (rl_completion_query_items > 0 && match_list_size >=
rl_completion_query_items)
  {
rl_ding ();
-   FREE (matches);
+   _rl_free_match_list (matches);
matches = (char **)0;
full_completion = 1;
return (0);
From 8eef36cd74caae425e536ead84b1a8cb1cad44b7 Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Sat, 3 Jun 2023 23:31:16 -0400
Subject: [PATCH] completion display interrupt leak handling

When readline's display of completion matches is interrupted, the match
list is only freed for the '?' completion type but not for '!&

Re: No form commsub if last token ends with & or ;

2023-06-18 Thread Grisha Levit
On Sunday, May 28, 2023, Grisha Levit  wrote:

> Missing final `;':
>
> "$BASH" --pretty-print <<< $'${ : \;;}'
> ${ : \; }
>

The latest set of fixes to this code solves these cases but others  have
issues:

$ bash --pretty-print <<<$'${ : \;;\n}'
${ : \; }
$ bash --pretty-print <<<$'${ : \;\n\n}'
${ : \; }
$ bash --pretty-print <<<$'${ : \&;\n}'
${ : \& }
$ bash --pretty-print <<<$'${ : \&;\n\n}'
${ : \& }

I think it might be ok to just have print_comsub handle adding the final
semicolon when needed instead of trying to add it after the fact in
parse_comsub?

(Patch below includes earlier fixes to semicolon() from
https://lists.gnu.org/archive/html/bug-bash/2023-06/msg00018.html)
---
diff --git a/externs.h b/externs.h
index a1363d4d..b1eb53dc 100644
--- a/externs.h
+++ b/externs.h
@@ -36,7 +36,7 @@ extern intmax_t evalexp (const char *, int, int *);
 #define FUNC_EXTERNAL  0x02

 extern char *make_command_string (COMMAND *);
-extern char *print_comsub (COMMAND *);
+extern char *print_comsub (COMMAND *, int);
 extern char *named_function_string (char *, COMMAND *, int);

 extern void print_command (COMMAND *);
diff --git a/parse.y b/parse.y
index c139a4d7..150bba65 100644
--- a/parse.y
+++ b/parse.y
@@ -4297,7 +4297,7 @@ static char *
 parse_comsub (int qc, int open, int close, size_t *lenp, int flags)
 {
   int peekc, r;
-  int start_lineno, dolbrace_spec, local_extglob, was_extpat, was_word;
+  int start_lineno, dolbrace_spec, local_extglob, was_extpat;
   char *ret, *tcmd;
   size_t retlen;
   sh_parser_state_t ps;
@@ -4342,7 +4342,6 @@ parse_comsub (int qc, int open, int close, size_t
*lenp, int flags)
   save_parser_state (&ps);

   was_extpat = (parser_state & PST_EXTPAT);
-  was_word = 0;

   /* State flags we don't want to persist into command substitutions. */
   parser_state &=
~(PST_REGEXP|PST_EXTPAT|PST_CONDCMD|PST_CONDEXPR|PST_COMPASSIGN);
@@ -4388,14 +4387,6 @@ parse_comsub (int qc, int open, int close, size_t
*lenp, int flags)

   r = yyparse ();

-  if (open == '{')
-{
-  if (current_token == shell_eof_token &&
-   (last_read_token == ';' || last_read_token == '\n') &&
-   (token_before_that == WORD || token_before_that ==
ASSIGNMENT_WORD))
-   was_word = 1;
-}
-
   if (need_here_doc > 0)
 {
   internal_warning ("command substitution: %d unterminated
here-document%s", need_here_doc, (need_here_doc == 1) ? "" : "s");
@@ -4459,7 +4450,7 @@ INTERNAL_DEBUG(("current_token (%d) !=
shell_eof_token (%c)", current_token, she
   restore_parser_state (&ps);
   pushed_string_list = saved_strings;

-  tcmd = print_comsub (parsed_command);/* returns static
memory */
+  tcmd = print_comsub (parsed_command, open);  /* returns static
memory */
   retlen = strlen (tcmd);
   if (open == '(') /* ) */
 {
@@ -4481,17 +4472,10 @@ INTERNAL_DEBUG(("current_token (%d) !=
shell_eof_token (%c)", current_token, she
 }
   else /* open == '{' } */
 {
-  int lastc;
-
-  lastc = tcmd[retlen - 1];
   retlen++;
-  ret = xmalloc (retlen + 4);
+  ret = xmalloc (retlen + 3);
   ret[0] = (dolbrace_spec == '|') ? '|' : ' ';
-  strcpy (ret + 1, tcmd);  /* ( */
-  if (was_word)
-   ret[retlen++] = ';';
-  else if (lastc != '\n' && lastc != ';' && lastc != '&')
-   ret[retlen++] = ';';
+  strcpy (ret + 1, tcmd);
   ret[retlen++] = ' ';
 }
   ret[retlen++] = close;
diff --git a/print_cmd.c b/print_cmd.c
index 29870837..0a403c6f 100644
--- a/print_cmd.c
+++ b/print_cmd.c
@@ -161,12 +161,14 @@ make_command_string (COMMAND *command)
back into an external representation without turning newlines into `;'.
Placeholder for other changes, if any are necessary. */
 char *
-print_comsub (COMMAND *command)
+print_comsub (COMMAND *command, int open)
 {
   char *ret;

   printing_comsub++;
   ret = make_command_string (command);
+  if (open == '{')
+semicolon();
   printing_comsub--;
   return ret;
 }
@@ -1446,9 +1448,11 @@ indent (int amount)
 static void
 semicolon (void)
 {
-  if (command_string_index > 0 &&
-   (the_printed_command[command_string_index - 1] == '&' ||
-   the_printed_command[command_string_index - 1] == '\n'))
+  if ((command_string_index > 1 &&
+   (the_printed_command[command_string_index - 2] == ' ' &&
+the_printed_command[command_string_index - 1] == '&')) ||
+  (command_string_index > 0 &&
+   (the_printed_command[command_string_index - 1] == '\n')))
 return;
   cprintf (";");
 }


Re: Various small leaks

2023-06-21 Thread Grisha Levit
On Wed, Jun 21, 2023, 10:08 Chet Ramey  wrote:
>
> On 6/17/23 4:02 AM, Grisha Levit wrote:
> > Found mostly by normal usage running a no-bash-malloc build with clang's
> > LeakSanitizer enabled.
>
> Are you running this on macOS or some other system?

I actually couldn't figure out how to get useful results from
LeakSanitizer on macOS.  It's disabled in Apple's build of clang and
though it's enabled in homebrew's it seemed to be producing bogus
results with unusable traces.

So I was testing on Linux, configured with:

~/src/bash/configure -C CC=clang CFLAGS='-g -O0 -fsanitize=leak
-fno-common -fno-omit-frame-pointer -fno-optimize-sibling-calls
-Wno-parentheses -Wno-format-security' --without-bash-malloc

This produces rather useful reports like:

$ LSAN_OPTIONS=detect-leaks=1:strip_path_prefix=$HOME/src/bash/ ./bash
-c 'BASH_ALIASES[foo]=bar'

=
==58001==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 4 byte(s) in 1 object(s) allocated from:
#0 0xd6573fa4 in malloc
(/home/parallels/bld/bash-lsan/bash+0x63fa4) (BuildId:
f61b4a39b5a8121d7dce584eee376250b9b2a92a)
#1 0xd66333dc in xmalloc xmalloc.c:107:10
#2 0xd65d3e54 in string_list_internal subst.c:2798:13
#3 0xd65d40ec in string_list subst.c:2840:11
#4 0xd65e298c in expand_subscript_string subst.c:10935:13
#5 0xd660dff4 in assign_array_element_internal arrayfunc.c:386:9
#6 0xd660dc60 in assign_array_element arrayfunc.c:353:11
#7 0xd65d74a8 in do_assignment_internal subst.c:3588:15
#8 0xd65d7810 in do_word_assignment subst.c:3646:10
#9 0xd65f7608 in do_assignment_statements subst.c:12971:14
#10 0xd65e335c in expand_word_list_internal subst.c:13049:6
#11 0xd65e3290 in expand_words subst.c:12372:11
#12 0xd659f210 in execute_simple_command execute_cmd.c:4501:15
#13 0xd659caa8 in execute_command_internal execute_cmd.c:889:4
#14 0xd663f74c in parse_and_execute builtins/evalstring.c:549:17
#15 0xd657a308 in run_one_command shell.c:1452:12
#16 0xd6577e80 in main shell.c:757:7
#17 0xbdd773f8 in __libc_start_call_main
csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#18 0xbdd774c8 in __libc_start_main csu/../csu/libc-start.c:392:3
#19 0xd654866c in _start
(/home/parallels/bld/bash-lsan/bash+0x3866c) (BuildId:
f61b4a39b5a8121d7dce584eee376250b9b2a92a)

SUMMARY: LeakSanitizer: 4 byte(s) leaked in 1 allocation(s).



Re: Various small leaks

2023-06-21 Thread Grisha Levit
On Wed, Jun 21, 2023 at 3:09 PM Chet Ramey  wrote:
> Have you considered running `make tests' on a bash binary built with this
> configuration? I'd be interested in those results.

I have, but unfortunately this generates quite a few reports that are
a little tricky to chase down.  The main difficulty is that lsan looks
for unreachable blocks (only) when the process exits so we only know
that there's a leak somewhere in e.g. the alias tests or the arith
tests but not at which line.  There's also a lot of duplication
because once a leak has occurred, lsan will end up generating the same
report for every subsequently invoked subshell.  The latter fact makes
it easy to see leaks in an interactive shell though, by putting '$(:)'
in PS1 for example.

Anyway, most of the leaks reported from running the test suite seem to
stem from longjmp's due to expansion/arithmetic/syntax errors, like:

$ ./bash -c 'let x='
bash: line 1: let: x=: arithmetic syntax error: operand expected
(error token is "=")

Direct leak of 2 byte(s) in 1 object(s) allocated from:
#1 0xdf1604ac in xmalloc xmalloc.c:107:10
#2 0xdf0ebb64 in expassign expr.c:528:13
#3 0xdf0eacd8 in expcomma expr.c:487:11
#4 0xdf0e9d3c in subexpr expr.c:468:9
#5 0xdf0e9a2c in evalexp expr.c:434:9
#6 0xdf174dac in let_builtin builtins/let.def:102:13

$ ./bash -c 'eval ": a=()"'
bash: eval: line 1: syntax error near unexpected token `('
bash: eval: line 1: `: a=()'

Direct leak of 32 byte(s) in 1 object(s) allocated from:
#1 0xe69604ac in xmalloc xmalloc.c:107:10
#2 0xe68c464c in make_bare_simple_command make_cmd.c:457:24
#3 0xe68c46f0 in make_simple_command make_cmd.c:482:17
#4 0xe68ac320 in yyparse parse.y:813:45
#5 0xe68aaea8 in parse_command eval.c:354:7
#6 0xe696c3a4 in parse_and_execute builtins/evalstring.c:440:11
#7 0xe696d4a0 in evalstring builtins/evalstring.c:831:9
#8 0xe696a638 in eval_builtin builtins/eval.def:55:18


Though here's one that's not error-related:

$ ./bash -O globstar -c 'echo **/foo*'
**/foo*

Direct leak of 8 byte(s) in 1 object(s) allocated from:
#1 0xc3320ca8 in glob_vector lib/glob/glob.c:958:31
#2 0xc3321ff8 in glob_filename lib/glob/glob.c:1328:21
#3 0xc32c0038 in shell_glob_filename pathexp.c:416:13
#4 0xc32b5448 in glob_expand_word_list subst.c:12422:17
#5 0xc32a0ac0 in expand_word_list_internal subst.c:13081:13
#6 0xc32a0914 in expand_words subst.c:12374:11
#7 0xc325dc60 in execute_simple_command execute_cmd.c:4501:15


There's quite a few others but they mostly seem harmless, things like
the case command not being freed in `case x in *) exit;; esac'.



uninitialized variable access in read_builtin

2023-06-22 Thread Grisha Levit
`read' can hit its timeout before it gets a chance to save the current
signal mask so sigprocmask can end up restoring an uninitialized
prevset. (Also all the sigprocmask calls other than the one in the jmp
target are protected by `#if defined (SIGCHLD)' so I guess this one
should be too)

Found by running the test suite on a build with clang's
MemorySanitizer enabled.  There were only two other reports, both from
quite recent additions, so I'll just mention them here:
* anonopen doesn't set *fn if memfd_create is used so uw_anonclose
frees an uninitialized pointer value later
* convert_validarray_flags_to_arrayval_flags doesn't initialize avflags

---
diff --git a/builtins/read.def b/builtins/read.def
index cb4e1e59..ecfb3d4a 100644
--- a/builtins/read.def
+++ b/builtins/read.def
@@ -428,6 +428,7 @@ read_builtin (WORD_LIST *list)
   sigemptyset (&chldset);
   sigprocmask (SIG_BLOCK, (sigset_t *)0, &chldset);
   sigaddset (&chldset, SIGCHLD);
+  sigprocmask (SIG_SETMASK, (sigset_t *)0, &prevset);
 #endif

   begin_unwind_frame ("read_builtin");
@@ -495,7 +496,9 @@ read_builtin (WORD_LIST *list)
   if (code)
{
  reset_timeout ();
+#if defined (SIGCHLD)
  sigprocmask (SIG_SETMASK, &prevset, (sigset_t *)0);
+#endif

  /* Tricky.  The top of the unwind-protect stack is the free of
 input_string.  We want to run all the rest and use input_string,



temp env vs export

2023-06-22 Thread Grisha Levit
Using `export' / `readonly' on a variable that's present in both the temp
env and a calling function's local context combines the attributes of all
the intervening scopes in the global variable:

$ declare -A v; f() { local -a v; v= e; }; e() { export v; }
$ (f; declare -p v)
declare -aAx v=([0]="")
$ (f; v=)
Segmentation fault: 11


Re: EOF at PS2

2023-06-23 Thread Grisha Levit
On Fri, May 26, 2023, 17:44 Chet Ramey  wrote:
>
> On 4/26/23 5:38 PM, Grisha Levit wrote:
> > A few issues with EOF being received at PS2:
>
> I finally had a chance to check out the ksh88 behavior on a Solaris 10 VM.
> It's pretty bizarre, but it does point to some improvements when ignoreeof
> is enabled. It seems like the POSIX committee wanted the ksh88 behavior
> without the weird aspects. I didn't check ksh93.
>
> I think we can have bash honor ignoreeof and not exit after delimiting the
> token or command while not acting quite as bizarrely as ksh88.


I think the new behavior is quite reasonable. Though it does need
something to allow the exit builtin to work even with ignoreeof, like
---
diff --git a/eval.c b/eval.c
index 8337a275..f91a09fe 100644
--- a/eval.c
+++ b/eval.c
@@ -185,7 +185,7 @@ reader_loop (void)
}
  if (EOF_Reached && interactive && ignoreeof &&
parse_and_execute_level == 0)
{
- if (handle_ignoreeof (1))
+ if (code != EXITBLTIN && handle_ignoreeof (1))
EOF_Reached = 0;
}
}



xtrace ansi quoting vs shell metas

2023-06-24 Thread Grisha Levit
When printing the command for xtrace, if a word or assignment rhs contains
a shell meta character, ansi quoting is not applied even if it otherwise
should be.

$ (set -x; : $'_\1' $'*\1') |& cat -v
+ : $'_\001' '*^A'

It seems that the order of the sh_contains_shell_metas and
ansic_shouldquote tests should be reversed in the xtrace_print_* functions
(which would match their usage in the rest of the code)
---
diff --git a/print_cmd.c b/print_cmd.c
index f8524e04..d7c5bbf9 100644
--- a/print_cmd.c
+++ b/print_cmd.c
@@ -514,10 +514,10 @@ xtrace_print_assignment (char *name, char
*value, int assign_list, int xflags)
   /* VALUE should not be NULL when this is called. */
   if (*value == '\0' || assign_list)
 nval = value;
-  else if (sh_contains_shell_metas (value))
-nval = sh_single_quote (value);
   else if (ansic_shouldquote (value))
 nval = ansic_quote (value, 0, (int *)0);
+  else if (sh_contains_shell_metas (value))
+nval = sh_single_quote (value);
   else
 nval = value;

@@ -554,15 +554,15 @@ xtrace_print_word_list (WORD_LIST *list, int xtflags)
fprintf (xtrace_fp, "''%s", w->next ? " " : "");
   else if (xtflags & 2)
fprintf (xtrace_fp, "%s%s", t, w->next ? " " : "");
-  else if (sh_contains_shell_metas (t))
+  else if (ansic_shouldquote (t))
{
- x = sh_single_quote (t);
+ x = ansic_quote (t, 0, (int *)0);
  fprintf (xtrace_fp, "%s%s", x, w->next ? " " : "");
  free (x);
}
-  else if (ansic_shouldquote (t))
+  else if (sh_contains_shell_metas (t))
{
- x = ansic_quote (t, 0, (int *)0);
+ x = sh_single_quote (t);
  fprintf (xtrace_fp, "%s%s", x, w->next ? " " : "");
  free (x);
}



leak in skip_double_quoted for funsub

2023-06-26 Thread Grisha Levit
diff --git a/subst.c b/subst.c
index 215e3469..63ca3370 100644
--- a/subst.c
+++ b/subst.c
@@ -1042,7 +1042,7 @@ skip_double_quoted (const char *string, size_t
slen, int sind, int flags)
  if (string[i + 1] == LPAREN)
ret = extract_command_subst (string, &si,
SX_NOALLOC|(flags&SX_COMPLETE));
  else if (string[i + 1] == LBRACE && FUNSUB_CHAR (string[si]))
-   ret = extract_function_subst (string, &si,
Q_DOUBLE_QUOTES, (flags & SX_COMPLETE));
+   ret = extract_function_subst (string, &si,
Q_DOUBLE_QUOTES, SX_NOALLOC|(flags&SX_COMPLETE));
  else
ret = extract_dollar_brace_string (string, &si,
Q_DOUBLE_QUOTES, SX_NOALLOC|(flags&SX_COMPLETE));



[PATCH] unwind protect for bind -x commands

2023-06-27 Thread Grisha Levit
If SIGINT is received during the execution of a bind -x command, the
memory allocated for the saved parser state is leaked and the READLINE_*
variables remain in the environment

* pcomplete.c,bashline.c:
- uw_restore_parser_state,uw_rl_set_signals: move from pcomplete.c to
  bashline.c
* bashline.h:
- extern declarations for uw_restore_parser_state, uw_rl_set_signals
* bashline.c:
- unbind_bindx_vars,uw_unbind_bindx_vars: new function to unbind shell
  variables set by bind -x commands
- bash_execute_unix_command: use unbind_bindx_vars, add unwind-protects
  for rl_set_signals, restore_parser_state, unbind_bindx_vars

The existing use of uw_restore_parser_state in gen_shell_function_matches
creates a 'restrict' copy of the pointer to the saved parser state.
I don't really understand why that is, so I didn't do that in
bash_execute_unix_command, but wanted to note here just in case.
---
 bashline.c  | 40 ++--
 bashline.h  |  3 +++
 pcomplete.c | 12 
 3 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/bashline.c b/bashline.c
index 02f36e3a..70d6778f 100644
--- a/bashline.c
+++ b/bashline.c
@@ -4428,6 +4428,34 @@ readline_set_char_offset (int ind, int *varp)
 }
 }

+void
+uw_restore_parser_state (void *ps)
+{
+  restore_parser_state (ps);
+}
+
+void
+uw_rl_set_signals (void *ignore)
+{
+  rl_set_signals ();
+}
+
+static void
+unbind_bindx_vars (void)
+{
+  check_unbind_variable ("READLINE_LINE");
+  check_unbind_variable ("READLINE_POINT");
+  check_unbind_variable ("READLINE_MARK");
+  check_unbind_variable ("READLINE_ARGUMENT");
+  array_needs_making = 1;
+}
+
+static void
+uw_unbind_bindx_vars (void *ignore)
+{
+  unbind_bindx_vars ();
+}
+
 int
 bash_execute_unix_command (int count, int key)
 {
@@ -4507,11 +4535,16 @@ bash_execute_unix_command (int count, int key)
 }
   array_needs_making = 1;

+  begin_unwind_frame ("bindx");
   save_parser_state (&ps);
+  add_unwind_protect (uw_unbind_bindx_vars, NULL);
+  add_unwind_protect (uw_restore_parser_state, &ps);
+  add_unwind_protect (uw_rl_set_signals, NULL);
   rl_clear_signals ();
   r = parse_and_execute (savestring (cmd),
"bash_execute_unix_command", SEVAL_NOHIST);
   rl_set_signals ();
   restore_parser_state (&ps);
+  discard_unwind_frame ("bindx");

   v = find_variable ("READLINE_LINE");
   maybe_make_readline_line (v ? value_cell (v) : 0);
@@ -4524,12 +4557,7 @@ bash_execute_unix_command (int count, int key)
   if (v && legal_number (value_cell (v), &mi))
 readline_set_char_offset (mi, &rl_mark);

-  check_unbind_variable ("READLINE_LINE");
-  check_unbind_variable ("READLINE_POINT");
-  check_unbind_variable ("READLINE_MARK");
-  check_unbind_variable ("READLINE_ARGUMENT");
-  array_needs_making = 1;
-
+  unbind_bindx_vars ();
   /* and restore the readline buffer and display after command execution. */
   /* If we clear the last line of the prompt above, redraw only that last
  line.  If the command returns 124, we redraw unconditionally as in
diff --git a/bashline.h b/bashline.h
index d9fb7379..2079799a 100644
--- a/bashline.h
+++ b/bashline.h
@@ -56,6 +56,9 @@ extern char **bash_default_completion (const char *,
int, int, int, int);
 extern void set_directory_hook (void);

 /* Used by programmable completion code. */
+extern void uw_rl_set_signals (void *);
+extern void uw_restore_parser_state (void *);
+
 extern char *command_word_completion_function (const char *, int);
 extern char *bash_groupname_completion_function (const char *, int);
 extern char *bash_servicename_completion_function (const char *, int);
diff --git a/pcomplete.c b/pcomplete.c
index 410a7b7d..717a1479 100644
--- a/pcomplete.c
+++ b/pcomplete.c
@@ -1030,18 +1030,6 @@ build_arg_list (const char *cmd, const char
*cname, const char *text, WORD_LIST
   return ret;
 }

-static void
-uw_restore_parser_state (void *ps)
-{
-  restore_parser_state (ps);
-}
-
-static void
-uw_rl_set_signals (void *ignore)
-{
-  rl_set_signals ();
-}
-
 /* Build a command string with
  $0 == cs->funcname (function to execute for completion list)
$1 == command name (command being completed)
-- 
2.41.0


0001-unwind-protect-for-bind-x-commands.patch
Description: Binary data


[PATCH] printing multiple heredocs in list

2023-06-29 Thread Grisha Levit
If there are multiple commands in a row that each require printing the
connector prior to the heredoc body, the connector ends up in the wrong
place for commands after the first:

fun() {
cat second);
-   PRINT_DEFERRED_HEREDOCS ("");
+   if (was_heredoc)
+ PRINT_DEFERRED_HEREDOCS ("");
printing_connection--;
break;

-- 
2.41.0



[PATCH] fix printing command after group with heredoc

2023-06-29 Thread Grisha Levit
If the last redirection list in a group / subshell / substitution has
a heredoc and the following connector is a semicolon, the connector is
incorrectly skipped over.

This happens only outside of function definitions, so I think it's an
issue only for pretty-print mode.

bash --pretty-print <<<$'(:<

[PATCH] sleep builtin signal handling

2023-06-29 Thread Grisha Levit
The sleep builtin currently doesn't do much in the way of signal
management, so for example it will return on SIGWINCH, which I think most
users will be particularly surprised by the first time they notice a sleep
ending early due to a window resize.

I looked at adding more signal handling to the current select-based
implementation but then realized it would be a lot easier to just use
nanosleep(2).

Both select() and nanosleep() first appear in 1997's XSH Issue 5 so I don't
know if we need to worry much about systems without it. (The patch leaves
the select/pselect code as fallback though).

---
diff --git a/config.h.in b/config.h.in
index c2750a2a..1b6a1853 100644
--- a/config.h.in
+++ b/config.h.in
@@ -771,6 +771,9 @@
 /* Define if you have the mkstemp function.  */
 #undef HAVE_MKSTEMP

+/* Define if you have the nonosleep function.  */
+#undef HAVE_NANOSLEEP
+
 /* Define if you have the pathconf function. */
 #undef HAVE_PATHCONF

diff --git a/configure.ac b/configure.ac
index 29f50438..de7d693c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -850,9 +850,9 @@ AC_REPLACE_FUNCS(rename)
 dnl checks for c library functions
 AC_CHECK_FUNCS(bcopy bzero clock_gettime confstr faccessat fnmatch \
getaddrinfo gethostbyname getservbyname getservent
inet_aton \
-   imaxdiv memmove pathconf putenv raise random regcomp
regexec \
-   setenv setlinebuf setlocale setvbuf siginterrupt strchr \
-   sysconf syslog tcgetattr times ttyname tzset unsetenv)
+   imaxdiv memmove nanosleep pathconf putenv raise random
regcomp \
+   regexec setenv setlinebuf setlocale setvbuf siginterrupt \
+   strchr sysconf syslog tcgetattr times ttyname tzset
unsetenv)

 AC_CHECK_FUNCS(vasprintf asprintf)
 AC_CHECK_FUNCS(isascii isblank isgraph isprint isspace isxdigit)
diff --git a/lib/sh/ufuncs.c b/lib/sh/ufuncs.c
index 247224d6..b3894450 100644
--- a/lib/sh/ufuncs.c
+++ b/lib/sh/ufuncs.c
@@ -82,7 +82,26 @@ falarm (unsigned int secs, unsigned int usecs)
 /* A version of sleep using fractional seconds and select.  I'd like to use
`usleep', but it's already taken */

-#if defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined
(HAVE_PSELECT))
+#if defined (HAVE_NANOSLEEP)
+int
+fsleep(unsigned int sec, unsigned int usec)
+{
+  int r;
+  struct timespec req, rem;
+
+  req.tv_sec = sec;
+  req.tv_nsec = usec * 1000;
+
+  for (;;)
+{
+  QUIT;
+  r = nanosleep (&req, &rem);
+  if (r == 0 || errno != EINTR)
+return r;
+  req = rem;
+}
+}
+#elif defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined
(HAVE_PSELECT))
 int
 fsleep(unsigned int sec, unsigned int usec)
 {
From a33289b218028b6d72966d8253646d6f174e09a5 Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Thu, 29 Jun 2023 21:55:05 -0400
Subject: [PATCH] fsleep: use nanosleep, handle signals

---
 config.h.in |  3 +++
 configure.ac|  6 +++---
 lib/sh/ufuncs.c | 21 -
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/config.h.in b/config.h.in
index c2750a2a..1b6a1853 100644
--- a/config.h.in
+++ b/config.h.in
@@ -771,6 +771,9 @@
 /* Define if you have the mkstemp function.  */
 #undef HAVE_MKSTEMP
 
+/* Define if you have the nonosleep function.  */
+#undef HAVE_NANOSLEEP
+
 /* Define if you have the pathconf function. */
 #undef HAVE_PATHCONF
 
diff --git a/configure.ac b/configure.ac
index 29f50438..de7d693c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -850,9 +850,9 @@ AC_REPLACE_FUNCS(rename)
 dnl checks for c library functions
 AC_CHECK_FUNCS(bcopy bzero clock_gettime confstr faccessat fnmatch \
 		getaddrinfo gethostbyname getservbyname getservent inet_aton \
-		imaxdiv memmove pathconf putenv raise random regcomp regexec \
-		setenv setlinebuf setlocale setvbuf siginterrupt strchr \
-		sysconf syslog tcgetattr times ttyname tzset unsetenv)
+		imaxdiv memmove nanosleep pathconf putenv raise random regcomp \
+		regexec setenv setlinebuf setlocale setvbuf siginterrupt \
+		strchr sysconf syslog tcgetattr times ttyname tzset unsetenv)
 
 AC_CHECK_FUNCS(vasprintf asprintf)
 AC_CHECK_FUNCS(isascii isblank isgraph isprint isspace isxdigit)
diff --git a/lib/sh/ufuncs.c b/lib/sh/ufuncs.c
index 247224d6..b3894450 100644
--- a/lib/sh/ufuncs.c
+++ b/lib/sh/ufuncs.c
@@ -82,7 +82,26 @@ falarm (unsigned int secs, unsigned int usecs)
 /* A version of sleep using fractional seconds and select.  I'd like to use
`usleep', but it's already taken */
 
-#if defined (HAVE_TIMEVAL) && (defined (HAVE_SELECT) || defined (HAVE_PSELECT))
+#if defined (HAVE_NANOSLEEP)
+int
+fsleep(unsigned int sec, unsigned int usec)
+{
+  int r;
+  struct timespec req, rem;
+
+  req.tv_sec = sec;
+  req.tv_nsec = usec * 1000;
+
+  for (;;)
+{
+  QUIT;
+  r = nanosleep (&req, &rem);
+  if (r == 0 || errno != EINTR)
+return r;
+  re

Re: [PATCH] printing multiple heredocs in list

2023-06-30 Thread Grisha Levit
On Fri, Jun 30, 2023, 16:02 Chet Ramey  wrote:

> On 6/29/23 4:01 AM, Grisha Levit wrote:
> > If there are multiple commands in a row that each require printing the
> > connector prior to the heredoc body, the connector ends up in the wrong
> > place for commands after the first:
>
> OK. The parser builds lists (connections) left-side heavy. What do you
> think of replacing the test for was_heredoc in your patch with
> (printing_connection == 1), so if we're called recursively to print a
> second connection on the left side (Connector->first), we print the here-
> document body after our caller has been able to add the connector in
> the right place.


Oh yeah that seems like a much better solution.

That way you shouldn't need to print any deferred
> here-documents in make_command_string().
>

Great, this was the part that made me think something was wrong with my
approach.

>


Completion list updates on dynamic builtin load

2023-07-04 Thread Grisha Levit
The `enabled' and `disabled' completion lists don't get updated when a
dynamic builtin is loaded:

$ compgen -A enabled tty
$ enable tty
$ compgen -A enabled tty
$

diff --git a/builtins/enable.def b/builtins/enable.def
index aa143760..399c7fa3 100644
--- a/builtins/enable.def
+++ b/builtins/enable.def
@@ -193,6 +193,8 @@ enable_builtin (WORD_LIST *list)
result = EXECUTION_FAILURE; /* normalize return value */
 #if defined (PROGRAMMABLE_COMPLETION)
   set_itemlist_dirty (&it_builtins);
+  set_itemlist_dirty (&it_enabled);
+  set_itemlist_dirty (&it_disabled);
 #endif
 }
 #endif
@@ -208,6 +210,8 @@ enable_builtin (WORD_LIST *list)
}
 #if defined (PROGRAMMABLE_COMPLETION)
   set_itemlist_dirty (&it_builtins);
+  set_itemlist_dirty (&it_enabled);
+  set_itemlist_dirty (&it_disabled);
 #endif
 }
 #endif
@@ -237,6 +241,8 @@ enable_builtin (WORD_LIST *list)
opt = r;
 #if defined (PROGRAMMABLE_COMPLETION)
  set_itemlist_dirty (&it_builtins);
+  set_itemlist_dirty (&it_enabled);
+  set_itemlist_dirty (&it_disabled);
 #endif
}
 #endif


Re: [PATCH] allow quoting completions w/o filename treatment

2023-07-05 Thread Grisha Levit
On Fri, Apr 14, 2023, 17:20 Grisha Levit  wrote:

>
> The attached patch hopefully addresses these difficulties by allowing for
> the decoupling of filename-specific match handling from match quoting.
>

Thanks very much for merging this.

It adds a new completion option `allquote' (I'm not quite happy with the
> name) which controls the corresponding new readline variable
> rl_all_quoting_desired.
>

I noticed you went with "fullquote" for the completion option name and it
occurs to me that this might be a bit confusing for users because of the
existing "complete_fullquote" shell option.

Maybe just "quote" would work? I don't mean to bikeshed this but wanted to
mention the potential overlap.

>


Re: [PATCH] leak in rl_filename_completion_function

2023-07-05 Thread Grisha Levit
On Wed, May 31, 2023 at 6:23 PM Grisha Levit  wrote:
>
> If rl_filename_rewrite_hook returns a new string for a filename (which
> I guess only happens on macOS with bash), it is in most cases not
> free-d.

> diff --git a/lib/readline/complete.c b/lib/readline/complete.c
> index bf7cc856..99556a35 100644
> --- a/lib/readline/complete.c
> +++ b/lib/readline/complete.c
> @@ -2521,6 +2521,9 @@ rl_filename_completion_function (const char
> *text, int state)
>convfn = dentry = 0;
>while (directory && (entry = readdir (directory)))
>  {
> +  if (convfn != dentry)
> +xfree (convfn);
> +
>convfn = dentry = entry->d_name;
>convlen = dentlen = D_NAMLEN (entry);
>
> @@ -2572,6 +2575,9 @@ rl_filename_completion_function (const char
> *text, int state)
>   users_dirname = (char *)NULL;
> }
>
> +  if (convfn != dentry)
> +xfree (convfn);
> +
>return (char *)NULL;
>  }
>else

I think this one might have gotten lost among the other reports, and
looking at it again it occurs to me this might be better solved by
just not allocating a new string in the filename rewrite hook since
rl_filename_completion_function will copy the string anyway.

diff --git a/bashline.c b/bashline.c
index 0e5373ab..dcf1ee1f 100644
--- a/bashline.c
+++ b/bashline.c
@@ -3279,12 +3279,7 @@ bash_directory_expansion (char **dirname)
 static char *
 bash_filename_rewrite_hook (char *fname, int fnlen)
 {
-  char *conv;
-
-  conv = fnx_fromfs (fname, fnlen);
-  if (conv != fname)
-conv = savestring (conv);
-  return conv;
+  return fnx_fromfs (fname, fnlen);
 }

 /* Functions to save and restore the appropriate directory hook */
diff --git a/lib/readline/complete.c b/lib/readline/complete.c
index d531f541..974bba5e 100644
--- a/lib/readline/complete.c
+++ b/lib/readline/complete.c
@@ -2637,9 +2637,6 @@ rl_filename_completion_function (const char
*text, int state)
   else
temp = savestring (convfn);

-  if (convfn != dentry)
-   xfree (convfn);
-
   return (temp);
 }
 }



give_terminal_to after re-backgrounded async job

2023-07-07 Thread Grisha Levit
The fix [1] for the issue reported in [2]:

+ give the terminal to pipeline_pgrp. We don't give the terminal
+ back to shell_pgrp if an async job exits because we never gave it
+ to that job in the first place. */
   if ((flags & JWAIT_NOTERM) == 0 && running_in_background == 0 &&
+  (job == NO_JOB || IS_ASYNC (job) == 0) &&
   (subshell_environment & (SUBSHELL_ASYNC|SUBSHELL_PIPE)) == 0)
  give_terminal_to (shell_pgrp, 0);

makes bash not take the terminal back when an async job is brought to the
foreground with `fg' and then subsequently TSTP-ed.

This is not immediately noticeable because if readline is in use,
yy_readline_get will shortly do it. However, with --noediting mode the
interactive shell will get EOF when reading the next command and exit.  And
even with readline, any attempt to read from the terminal prior to readline
doing it (e.g. from PROMPT_COMMAND) will result in EIO (which is how I
first noticed this).

[1]: https://git.savannah.gnu.org/cgit/bash.git/commit/?h=devel&id=a37b2af9
[2]: https://lists.gnu.org/archive/html/bug-bash/2023-01/msg00057.html


$ bash --norc --noediting
bash-5.3$ sleep 60 &
[1] 30163
bash-5.3$ fg
sleep 60
^Z # shell exits after this
[1]+ Stopped sleep 60
bash-5.3$ exit
There are stopped jobs.
bash-5.3$ exit

$ bash --norc
bash-5.3$ r() { read -r -s -dR -t1 -p$'\e[6n'; }
bash-5.3$ PROMPT_COMMAND=r
bash-5.3$ sleep 60 &
[1] 31088
bash-5.3$ fg
sleep 60
^Z
[1]+ Stopped sleep 60
bash: read: read error: 0: I/O error
bash-5.3$


[PATCH] normalization tweaks for macOS

2023-07-07 Thread Grisha Levit
A few small tweaks for the macOS-specific normalization handling to
handle the issues below:

Examples below are using the following setup:
$ mkdir -p -- $'\303\251-C' $'e\314\201-D'
$ /bin/ls -BF
é-C/   é-D/
$ LC_ALL=C /bin/ls -BF
e\314\201-D/ \303\251-C/

LC_ALL=C just to make the output clear, the shell's locale is UTF-8.
This is on APFS, which is normalization-preserving, but similar
results show up on HFS+ (which is not).  (In either case, access is
normalization insensitive).
---

When attempting to match a direntry name to the supplied pattern, the
globbing code converts the name from UTF-8-MAC encoding to (say)
UTF-8.  This is essentially an NFC normalization.  If the converted
name matches, the original name is then (correctly, I think) returned
by the glob operation:
$ LC_ALL=C printf '%q ' $'\303\251'*
$'\303\251-C' $'e\314\201-D'

However, the pattern itself does not go through normalization and
unless it is already NFC-normalized, it won't match anything.  So even
names retrieved from globbing might not match themselves:
$ for x in *; { echo "$x:" "$x"*/; }
é-C: é-C/
é-D:

Seems like it would be appropriate to normalize the pattern too.

One maybe tricky thing here is that quoted characters in the pattern
are supplied to the globbing code prefixed by a backslash, making
normalization fail to combine combining characters.  It's possible to
adjust quote_string_for_globbing to only put in backslashes when the
quoted character is special for globbing but that might complicate the
code more than necessary -- I kind of cheated by just not adding a
backslash if the quoted character is non-ASCII.  I can't think of any
way a non-ASCII character can be special in globbing code but maybe
I'm not trying hard enough.
---

Filename completion has a similar situation. The NFC form matches any
text that normalizes to it:
$ bash -in <<<$'\303\251\e*'
bash-5.3$ é-C é-D

But NFD text matches nothing, not even itself:
$ bash -in <<<$'e\314\201\e*'
bash-5.3$ é

Admittedly, filename completion does not itself produce non-NFC text
so it's less likely that this would be encountered, but normalizing
the hint text before comparing it to normalized filenames seems easy
enough.

BTW glob-expand-word can result in NFD text on the input line but that
seems correct since it's what globbing produces.
---

If filename completion is invoked through `compgen', it behaves
differently in scripts since bashline.c:initialize_readline hasn't had
a chance to set rl_filename_rewrite_hook.

$ bash -c $'compgen -f -- \303\251'
é-C
$ bash -c $'compgen -f -- e'
é-D

This can be worked around by calling `bind' manually, resulting in the
same behavior as in an interactive shell:
$ bash -c $'bind; compgen -f \303\251'
é-C
é-D
$ bash -c $'bind; compgen -f -- e'
$

..but seems safe enough to set the hook from compgen directly as well.
From 6b2c8b478e4a4077b6986ab1fc44a14c739f3459 Mon Sep 17 00:00:00 2001
From: Grisha Levit 
Date: Wed, 5 Jul 2023 22:15:34 -0400
Subject: [PATCH] fnxform tweaks

* lib/glob/glob.c
- glob_vector: apply fnx_fromfs to pattern to make sure it can match
  normalized filenames
* pathexp.c
- quote_string_for_globbing: don't convert CTLESC to backslash if it was
  quoting a non-ascii character. cheap solution to keep the bytes of
  multibyte characters together for fnx_fromfs.

* lib/readline/complete.c
- rl_filename_completion_function: if rl_filename_rewrite_hook is set,
  use it to normalize the filename we are matching

* bashline.[ch],builtins/complete.def
- set_filename_rewrite_hook: new function to set the bash-specific value
  of rl_filename_rewrite_hook, used by compgen builtin to ensure
  correct results even if readline has not be initialized
---
 bashline.c  |  6 ++
 bashline.h  |  1 +
 builtins/complete.def   |  2 ++
 lib/glob/glob.c | 11 ++-
 lib/readline/complete.c | 11 +++
 pathexp.c   |  5 -
 6 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/bashline.c b/bashline.c
index 0e5373ab..9e2e9260 100644
--- a/bashline.c
+++ b/bashline.c
@@ -3287,6 +3287,12 @@ bash_filename_rewrite_hook (char *fname, int fnlen)
   return conv;
 }
 
+void
+set_filename_rewrite_hook (void)
+{
+  rl_filename_rewrite_hook = bash_filename_rewrite_hook;
+}
+
 /* Functions to save and restore the appropriate directory hook */
 /* This is not static so the shopt code can call it */
 void
diff --git a/bashline.h b/bashline.h
index d1ef55e8..ce7dc520 100644
--- a/bashline.h
+++ b/bashline.h
@@ -57,6 +57,7 @@ extern int unbind_unix_command (char *);
 extern char **bash_default_completion (const char *, int, int, int, int);
 
 extern void set_directory_hook (void);
+extern void set_filename_rewrite_hook (void);
 
 /* Used by programmable completi

leak in command_word_completion_function

2023-07-07 Thread Grisha Levit
If there a glob is expanded to more than one result while attempting
to complete the command word, the matches are discarded but not freed.

diff --git a/bashline.c b/bashline.c
index 0e5373ab..07f38e62 100644
--- a/bashline.c
+++ b/bashline.c
@@ -2192,7 +2192,11 @@ globword:
local_index = 0;

if (glob_matches[1] && rl_completion_type == TAB) /* multiple
matches are bad */
- return ((char *)NULL);
+ {
+   strvec_dispose (glob_matches);
+   glob_matches = (char **)NULL;
+   return ((char *)NULL);
+ }
  }



[PATCH] print regerror string on regcomp error

2023-07-12 Thread Grisha Levit
Since bash-5.3 now shows an error message when a regular expression
can't be compiled, I thought it might be useful to add the
regerror()-supplied string that provides more specifics on the
failure, so we can get messages like:

$ [[ x =~ [z-a] ]]
bash: [[: invalid regular expression `[z-a]': invalid character range


0001-print-regerror-string-on-regcomp-error.patch
Description: Binary data


[PATCH 1/2] <<# indent-stripping heredoc

2023-07-13 Thread Grisha Levit
This patch implements the ksh93-style <<# redirection operator to
enable indentatable heredocs.  This (or similar) functionality has
been requested a few times, most recently discussed at
https://lists.gnu.org/archive/html/bug-bash/2021-09/msg0.html

The behavior for heredocs started with the <<# redirection operator is:

* Any leading blank characters on the first line of the heredoc define
the indentation of the heredoc
* Tab characters in indentation are interpreted as indenting to the
next 8-column-wide tab stop
* All lines of the heredoc are stripped of (up to) as many columns of
indentation as found on the first line
* The heredoc is terminated when, after any indentation has been
removed, the remaining line exactly matches the delimiter

There is a small (and not very interesting) edge case: if the heredoc
delimiter itself has leading blanks (e.g. <<# " EOF"), then the first
line cannot terminate the heredoc (even if it exactly matches the
delimiter) because any leading blanks on the first line are treated as
defining the heredoc's indentation and so cannot form part of the
line.  This matches the ksh93 behavior.

As pointed out by Lawrence Velázquez in the last discussion, <<# is
currently a syntax error except for the case of `shopt -u
interactive_comments' in an interactive shell.  I don't think it's
likely that users are turning off interactive comments and starting
heredoc delimiters with `#' but it would be a trivial change to the
code to not recognize <<# docs in that circumstance, should we want to
go that route.


0001-indent-stripping-heredoc.patch
Description: Binary data


[PATCH 2/2] <<# indent-stripping heredoc: indented printing

2023-07-13 Thread Grisha Levit
Enable prettier-printing of <<# heredocs by printing the body indented
one level further than surrounding text and the final delimiter at the
same indentation level as the surrounding text.


0002-indent-stripping-heredoc-indented-printing.patch
Description: Binary data


Re: [PATCH 1/2] <<# indent-stripping heredoc

2023-07-14 Thread Grisha Levit
On Fri, Jul 14, 2023 at 3:44 AM Martin D Kealey  wrote:
>
> On the whole I think this is great, and thankyou for working up the patch, 
> but I would like to offer some comments and suggestions:

Thanks for looking at it, feedback very much appreciated.

> One option that some other languages use is to find the terminator, and then 
> use its indentation as the pattern to remove from the content lines. The 
> problem of course is that it would take a double run over the content, but 
> the benefit is that there'd only be one in-band signaling line instead of two.

I agree, this would be useful functionality, would be interested in
seeing your implementation.

> Secondly, the battle for 8-space tabs has been well and truly lost at this 
> point, so hard-coding that constant feels like it's likely to be a source of 
> errors.
>
> In order to be tab-agnostic, I can see two reasonable options:
> 1. remove only an exact match for the sequence of whitespace characters that 
> occurs in the indicator line

I think a problem with this is that a coding style using mixed
tabs/spaces (such as the bash source..) would have

foo
  EOF

saved as:

[tab]foo
[spc][spc][spc][spc][spc][spc]EOF


so I don't think it's feasible to record the indent level as a fixed
tab/space mix if we are to allow indentation deeper into the heredoc
using the document's native style, at least not without ending up back
at the problem of needing to pick a tab stop width to use.

Personally, I don't use this indentation style (but don't wish to
precipitate an argument everyone is surely tired of) so I don't have
much of an opinion on what would be useful here.  One slight point in
favor of keeping the 8-space wide tab approach is that this is how
readline renders literal tabs as well.



printf %ls conversion

2023-07-17 Thread Grisha Levit
The mbsrtowcs call here doesn't convert the final \0 of mbs, leaving the
final byte of ws as whatever was just malloc-ed.

Noticed in an ASAN build which makes sure that this is never L'\0'.  Oddly,
this didn't actually trigger an ASAN report, just saw that nothing was
getting printed by printf.
---
diff --git a/builtins/printf.def b/builtins/printf.def
index ad4f4d12..62820514 100644
--- a/builtins/printf.def
+++ b/builtins/printf.def
@@ -1493,7 +1493,7 @@ getwidestr (size_t *lenp)
   mbs = garglist->word->word;
   slen = strlen (mbs);
   ws = (wchar_t *)xmalloc ((slen + 1) * sizeof (wchar_t));
- mblength = mbsrtowcs (ws, &mbs, slen, &state);
+ mblength = mbsrtowcs (ws, &mbs, slen + 1, &state);
   if (lenp)
 *lenp = mblength;


Re: [PATCH] print regerror string on regcomp error

2023-07-17 Thread Grisha Levit
On Mon, Jul 17, 2023, 12:09 Chet Ramey  wrote:

> I guess we'll see how much the text of regerror(3)
> error messages varies across regexp implementations.
>

Is that a concern? The GNU strings [1] are translated and even their
untranslated text does differ from that of the BSD ones [2].

The set of errors is also different, e.g. REG_EMPTY is only in BSD because
its regcomp() does not accept `()' while GNU's does.  But it's precisely
this difference that I think would help clue users into why their scripts
work on one platform and not another.


[1]:
https://sourceware.org/git/?p=glibc.git;a=blob;f=posix/regcomp.c;h=12650714c06890d99a6a7aaa96437a2f83cc973b;hb=HEAD#l132

[2]:
https://cgit.freebsd.org/src/tree/lib/libc/regex/regerror.c?id=8a16b7a18f5d0b031f09832fd7752fba717e2a97#n90


Re: [PATCH] normalization tweaks for macOS

2023-07-17 Thread Grisha Levit
On Mon, Jul 17, 2023 at 3:29 PM Chet Ramey  wrote:
>
> On 7/7/23 5:05 PM, Grisha Levit wrote:
> > A few small tweaks for the macOS-specific normalization handling to
> > handle the issues below:
>
> The issue is that the behavior has to be different between cases where
> the shell is reading input from the terminal and gets NFC characters
> that need to be converted to NFD (which is how HFS+ and APFS store them)
> and when the shell is reading input from a file and doesn't need to (and
> should not) do anything with NFD characters.

NB: while HFS+ stores NFD names, APFS preserves normalization, so we
can get either NFC or NFD text back from readdir.  Both are
normalization-insensitive: "Being normalization-insensitive ensures
that normalization variants of a filename cannot be created in the
same directory, and that a filename can be found with any of its
normalization variants." [1]

Currently, Bash never actually converts to NFD.  The fnx_tofs()
function is there but it is never used.  Instead, Bash converts
filenames to NFC with fnx_fromfs() before comparing with either the
glob pattern or the completion hint text (which is never converted).

Since access is normalization-insensitive, we just need to normalize
to _some_ form, so going to NFC is fine, but if we're going to do that
we should normalize both the filesystem name and the text being
compared.

If there's a match, globs expand to the filenames (NFC or NFD) as
returned by readdir(), and Readline completes with NFC-normalized
versions of the names.  I think this makes sense.

What doesn't work quite right currently though is that glob patterns
with NFD text never match anything, and completion prefixes with NFD
text never expand to anything.

[1]: 
https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/APFS_Guide/FAQ/FAQ.html

> Does iconv work when taking NFD input that came from the file system and
> trying to convert it to NFD (UTF-8-MAC)? I've honestly never checked.

Converting to UTF-8-MAC always normalizes to NFD:

$ printf '\303\251\0\145\314\201' | iconv -f UTF-8-MAC -t UTF-8-MAC | od -b -An
  145 314 201 000 145 314 201

$ printf '\303\251\0\145\314\201' | iconv -f UTF-8 -t UTF-8-MAC | od -b -An
  145 314 201 000 145 314 201

But Bash only converts from UTF-8-MAC to UTF-8, which always normalizes to NFC:

$ printf '\303\251\0\145\314\201' | iconv -f UTF-8-MAC -t UTF-8 | od -b -An
  303 251 000 303 251



[PATCH] fix bind -X quoting

2023-07-17 Thread Grisha Levit
The output of `bind -X' is not reusable if the bound command has quotes,
backslashes, etc.

$ bind -x '"\eX": echo "x"'
$ bind -X
"\eX": "echo \"x\""
$ bind -x "$(bind -X)"
$ bind -X
"\eX": "echo \\\"x\\\""

This patch changes rl_macro_dumper to not untranslate the macro body
when passed a negative print_readably argument.  This is technically an
API change, so maybe not the best fix, though I doubt it will impact any
real usage.
---
 bashline.c   | 2 +-
 lib/readline/bind.c  | 6 +-
 lib/readline/doc/rltech.texi | 4 +++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/bashline.c b/bashline.c
index 2fb00e82..e2b329f9 100644
--- a/bashline.c
+++ b/bashline.c
@@ -4583,7 +4583,7 @@ print_unix_command_map (void)
   save = rl_get_keymap ();
   cmd_xmap = get_cmd_xmap_from_keymap (save);
   rl_set_keymap (cmd_xmap);
-  rl_macro_dumper (1);
+  rl_macro_dumper (-1);
   rl_set_keymap (save);
   return 0;
 }
diff --git a/lib/readline/bind.c b/lib/readline/bind.c
index ee6d6e17..bae0e6cc 100644
--- a/lib/readline/bind.c
+++ b/lib/readline/bind.c
@@ -2861,7 +2861,11 @@ _rl_macro_dumper_internal (int print_readably,
Keymap map, char *prefix)
{
case ISMACR:
  keyname = _rl_get_keyname (key);
- out = _rl_untranslate_macro_value ((char *)map[key].function, 0);
+
+ if (print_readably < 0)
+   out = savestring ((char *)map[key].function);
+ else
+   out = _rl_untranslate_macro_value ((char *)map[key].function, 0);

  if (print_readably)
fprintf (rl_outstream, "\"%s%s\": \"%s\"\n", prefix ? prefix : "",
diff --git a/lib/readline/doc/rltech.texi b/lib/readline/doc/rltech.texi
index 4d3f3cca..83a34a5f 100644
--- a/lib/readline/doc/rltech.texi
+++ b/lib/readline/doc/rltech.texi
@@ -1354,8 +1354,10 @@ use @code{rl_generic_bind()} instead.
 @deftypefun void rl_macro_dumper (int readable)
 Print the key sequences bound to macros and their values, using
 the current keymap, to @code{rl_outstream}.
-If @var{readable} is non-zero, the list is formatted in such a way
+If @var{readable} is greater than zero, the list is formatted in such a way
 that it can be made part of an @code{inputrc} file and re-read.
+If @var{readable} is less than zero, the macros are printed in "translated"
+form.
 @end deftypefun

 @deftypefun int rl_variable_bind (const char *variable, const char *value)
-- 
2.41.0



Re: [PATCH] normalization tweaks for macOS

2023-07-18 Thread Grisha Levit
On Tue, Jul 18, 2023 at 9:55 AM Chet Ramey  wrote:
> Unicode normalization on macOS has always been a pain in the ass.

I can see that!

> This is the basic assumption that drives all the decisions: character input
> you get from the terminal is in NFC, and files from the file system (names
> and usually contents) are in NFD.

I guess the point of this patch was that this assumption does not
always hold -- neither file system names nor terminal input have any
guaranteed normalization.

I'll just give one more example, sorry if this is repetitive.

In Finder, create two directories: señor and señora. List the parent
directory in a Terminal window:
$ ls
señor  señora

Type `echo ' and then copy-paste the first filename from `ls' output:
$ echo señor

Note that tab completion doesn't work. Append a `*' and run the command:
$ echo señor*
señor*

And if you're writing a script you have the same issue:

$ bash -O failglob -c 'set -- *; echo "$1"*'
bash: line 1: no match: señor*

I'm not sure if you're saying that this is correct behavior, or if
it's not worth fixing (I'm fine with that), or if there's something
wrong with my proposed fix.

> > NB: while HFS+ stores NFD names, APFS preserves normalization, so we
> > can get either NFC or NFD text back from readdir.
>
> Well, that doesn't help. But I haven't seen any NFC text coming back from
> readdir on any of my macs.
>
> > Currently, Bash never actually converts to NFD.  The fnx_tofs()
> > function is there but it is never used.  Instead, Bash converts
> > filenames to NFC with fnx_fromfs() before comparing with either the
> > glob pattern or the completion hint text (which is never converted).
>
> Correct. It's a one-way conversion, since you only have to convert one
> of the two different forms, and the current implementation works on text
> entered interactively (which is in NFC). When you're reading a script, you
> don't have to perform any conversion at all; your NFD examples all work
> fine when run from a script.
>
> > Since access is normalization-insensitive, we just need to normalize > to 
> > _some_ form, so going to NFC is fine, but if we're going to do that
> > we should normalize both the filesystem name and the text being
> > compared.
>
> The idea is that since the text entered interactively at the terminal is
> already in NFC, the curent implementation converts only what it knows is
> coming from the keyboard.

Did you mean "coming from the filesystem"?  There are two places where
fnx_fromfs is called: bash_filename_rewrite_hook, and glob_vector.
Both of these operate on directory entries, not on keyboard input.

> > If there's a match, globs expand to the filenames (NFC or NFD) as
> > returned by readdir(), and Readline completes with NFC-normalized
> > versions of the names.  I think this makes sense.
>
> Because NFC is what you get from terminal input.
>
> > What doesn't work quite right currently though is that glob patterns
> > with NFD text never match anything, and completion prefixes with NFD
> > text never expand to anything.
>
> When entered from the terminal. It goes back to the basic assumption: NFC
> is what you get from the terminal, so you have to convert from the file
> system normalization form when you're sure what you want to compare is
> coming from the terminal.



Re: [PATCH] fix bind -X quoting

2023-07-19 Thread Grisha Levit
On Wed, Jul 19, 2023 at 10:47 AM Chet Ramey  wrote:
> Thanks for the report. It seems like your patch is incomplete, though.
> After applying it:
>
> $ bind -x '"\eX": echo "x"'
> $ bind -X
> "\eX": "echo "x""
>
> We probably need to suppress printing the double quotes around `out' if
> print_readably < 0.

Oh good point.  I guess we'd need to also print a backslash if the
first character of the command is a quote.



Re: slash appended to tab so its two // at end

2023-07-19 Thread Grisha Levit
On Wed, Jul 19, 2023 at 9:07 AM Chet Ramey  wrote:
>
> On 7/18/23 10:01 PM, alex xmb ratchev wrote:
> > i in 5.2.15 bash aarch64 termux did
> >
> > $ cp -ap db2.*/
> >
> > and got
> >
> > db2.i5// db2.i7//
>
> I can't reproduce this.

This is a combination of either visible-stats or mark-directories
being on, filename completion being active, and the word being
completed ending with a slash and referring to an existing directory.

$ INPUTRC=/dev/null bash --norc
bash-5.3$ cd /tmp
bash-5.3$ mkdir bin bld
bash-5.3$ cd b*/
bin// bld//

bash-5.3$ complete -W 'bin/ bld/' -o filenames x
bash-5.3$ x b
bin// bld//

Also I think this is not a bug -- readline is asked to append a slash
to directory names and it does so.  Same thing happens with e.g. `ls',
when asked to append a slash to directory names and the supplied name
already has a slash:

$ ls -dp /var/
/var//



Re: slash appended to tab so its two // at end

2023-07-19 Thread Grisha Levit
On Wed, Jul 19, 2023 at 12:57 PM Grisha Levit  wrote:
> Also I think this is not a bug -- readline is asked to append a slash
> to directory names and it does so.

It's easy enough to avoid printing the indicator slash if we just
printed a path already ending in a slash.  It's an improvement
cosmetically but I think correctness can be argued either way.

diff --git a/lib/readline/complete.c b/lib/readline/complete.c
index 4b3a4236..d0c6a8eb 100644
--- a/lib/readline/complete.c
+++ b/lib/readline/complete.c
@@ -1059,6 +1059,10 @@ print_filename (char *to_print, char
*full_pathname, int prefix_bytes)
}

   xfree (s);
+
+  if (extension_char == '/' && (s = strrchr (to_print, '/')) && s[1] == 0)
+extension_char = 0;
+
   if (extension_char)
{
  putc (extension_char, rl_outstream);



[PATCH] add libintl dep for unwind_prot

2023-07-19 Thread Grisha Levit
(so make -j works)
diff --git a/Makefile.in b/Makefile.in
index 7f990bfa..ce909bc6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1440,6 +1440,7 @@ sig.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
 siglist.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
 subst.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
 test.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
+unwind_prot.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
 trap.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
 variables.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h
 version.o: bashintl.h ${LIBINTL_H} $(BASHINCDIR)/gettext.h



Re: problem anomalies , possibly aliases related

2023-07-20 Thread Grisha Levit
On Thu, Jul 20, 2023, 01:42 alex xmb ratchev  wrote:

>
>
> 2. it says [[ ! -d then ' continue ' .. where is cp
> i call no , not c , ...
> 1. cp missing
> 2. the [[ ! -d return to continue looks bug wrong
>

Try putting the code that uses the alias into a function, and then print
the function definition. You'll see how it's being expanded.

Using Martin's example:

$ alias A='B ; C'
$ f() { D && A; }
$ declare -p -f f
f ()
{
D && B;
C
}

Sounds like you want all the commands in the alias to be executed as a
group -- so you can just write it as one:

alias bad='{ echo fail; continue; }'

>


Re: [PATCH] normalization tweaks for macOS

2023-07-20 Thread Grisha Levit
On Thu, Jul 20, 2023 at 11:54 AM Chet Ramey  wrote:
> So I'll go ahead with your patch, starting with the globbing changes.

Thanks! BTW, changing quote_string_for_globbing to skip escaping
characters that don't need it makes globbing >2x faster in the case
that most of the pattern is a quoted string.  This patch only skips
over non-ASCII characters (because that's all that was needed for the
normalization to work) but I wonder if you'd be interested in changes
to the function that would skip escaping ASCII characters that aren't
glob-special as well.  I _think_ it would just be a slight variation
of the code that already does this for regexes but I haven't thought
about the edge cases carefully yet.

mkdir /tmp/new && cd /tmp/new
touch {0001..1000}
printf -v str %1000s; export str=${str// /π}
export TIMEFORMAT=%3lR

bash-devel -c 'time for i in {1..100}; { : "$str"*; }'
0m5.549s
bash-devel -c 'time for i in {1..100}; { : $str*; }'
0m1.936s

bash-patch -c 'time for i in {1..100}; { : "$str"*; }'
0m1.948s
bash-patch -c 'time for i in {1..100}; { : $str*; }'
0m1.926s



Re: [PATCH] normalization tweaks for macOS

2023-07-25 Thread Grisha Levit
On Mon, Jul 24, 2023 at 10:12 AM Chet Ramey  wrote:
>
> On 7/20/23 7:52 PM, Grisha Levit wrote:
>
> > I wonder if you'd be interested in changes
> > to the function that would skip escaping ASCII characters that aren't
> > glob-special as well.  I _think_ it would just be a slight variation
> > of the code that already does this for regexes but I haven't thought
> > about the edge cases carefully yet.
>
> I agree that it would only be a slight variation on that code, and so I
> added that case. It will be in the next devel branch push.

Re latest changes in [1], we need to preserve quoting also for any of
the following characters, at least if they are in a bracket
expression:
! - . : = ^

Attached patch does so but is probably more complicated than necessary.

Also, there's a mention of changes to complete.def in the changelog
but it looks like the actual changes didn't make it into the commit.

[1]: 
https://git.savannah.gnu.org/cgit/bash.git/diff/pathexp.c?h=devel&id=8418224f3


0001-preserve-quoting-for-chars-special-in-bracket-expr.patch
Description: Binary data


[PATCH] read: non-raw-mode fixes

2023-07-26 Thread Grisha Levit
This patches addresses a few issues with `read' when not in raw mode.

If the last character read was an (unescaped) backslash, store it as
such instead of as a CTLESC.  Avoids:

$ printf '\\' | { read; echo "${REPLY@Q}"; }
bash: DEBUG warning: dequote_string: string with bare CTLESC
$'\001'

If an escaped null byte is read, skip it as we do with an unescaped one,
instead of adding it to input_string.  Avoids:

$ printf 'A\\\0B\nC\n' | while read; do echo "${REPLY@Q}"; done
'A'
'C'

$ printf '\\\0' | { read; echo "${REPLY@Q}"; }
bash: DEBUG warning: dequote_string: string with bare CTLESC
$'\001' # even after fix for first issue

If IFS contains \177 and the input consists of only backslash-newline
pairs and a sole \177, prevent the bare CTLNUL from being turned into an
empty string.  Avoids:

$ printf '\\\n\177' | { IFS=$'\177' read; echo "${REPLY@Q}"; }
''
---
 builtins/read.def | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/builtins/read.def b/builtins/read.def
index 5b2621fe..84878699 100644
--- a/builtins/read.def
+++ b/builtins/read.def
@@ -404,7 +404,7 @@ read_builtin (WORD_LIST *list)
   input_string[0] = '\0';

   pass_next = 0;   /* Non-zero signifies last char was backslash. */
-  saw_escape = 0;  /* Non-zero signifies that we saw an escape char */
+  saw_escape = 0;  /* Index+1 of when we last saw an escape char. */

   /* More input and options validation */
   if (nflag == 1 && nchars == 0)
@@ -751,11 +751,11 @@ read_builtin (WORD_LIST *list)
   if (pass_next)
{
  pass_next = 0;
- if (c == '\n')
+ if (c == '\n' || c == '\0')
{
  if (skip_ctlesc == 0 && i > 0)
i--;/* back up over the CTLESC */
- if (interactive && input_is_tty && raw == 0)
+ if (interactive && input_is_tty && raw == 0 && c == '\n')
print_ps2 = 1;
}
  else
@@ -769,8 +769,8 @@ read_builtin (WORD_LIST *list)
  pass_next++;
  if (skip_ctlesc == 0)
{
- saw_escape++;
  input_string[i++] = CTLESC;
+ saw_escape=i;
}
  continue;
}
@@ -783,8 +783,8 @@ read_builtin (WORD_LIST *list)

   if ((skip_ctlesc == 0 && c == CTLESC) || (skip_ctlnul == 0 && c
== CTLNUL))
{
- saw_escape++;
  input_string[i++] = CTLESC;
+ saw_escape=i;
}

 add_char:
@@ -825,6 +825,12 @@ add_char:
   if (nchars > 0 && nr >= nchars)
break;
 }
+
+  if (i && saw_escape == i && input_string[i-1] == CTLESC)
+input_string[i-1] = '\\';  /* Preserve trailing backslash */
+  else if (skip_ctlnul && i == 1 & saw_escape == 1 && input_string[0]
== CTLNUL)
+saw_escape = 0;/* Avoid dequoting bare CTLNUL */
+
   input_string[i] = '\0';
   check_read_timeout ();

-- 
2.41.0


0001-read-non-raw-mode-fixes.patch
Description: Binary data


Re: [PATCH] fix bind -X quoting

2023-07-26 Thread Grisha Levit
On Wed, Jul 26, 2023, 16:06 Chet Ramey  wrote:
>
> On 7/24/23 1:13 PM, Chet Ramey wrote:
>
> > You could do it if you allowed, say
> >
> > bind -x '"\eX": \"command with spaces\" \"x\"'
> >
> > and then stripped the backslashes before calling rl_generic_bind, but
> > that's not exactly backwards compatible either.
>
> Thinking about it some more, you can do it like this:
>
> bind -x $'"\\eX": \'"command with spaces" "x"\''
>
> since bind -x allows single-quoted strings as the command to execute,
> and $'...' allows backslash-escaped single quotes.
>
> If we ran the command string through rl_translate_keyseq, it would allow
> backslash-escaped double quotes and strip the backslashes, but you get
> the rest of the backslash processing that you probably don't want.
>
> It's just not transitive.


Another issue I didn't think of with printing the unquoted translated
command is that it can include newlines, which is a problem since you
have to read the `bind -X' output one line at a time to reuse it with
`bind -x'.

If there isn't a backwards compatible way to produce output that is
reusable given the current input format, I wonder if we can leverage a
format that's not currently valid as input.  `bind -x' currently
requires a colon following the key sequence but we could change it to
also allow input without it and use rl_macro_bind instead of
rl_generic_bind if we get such input.  If we have `bind -X' produce
untranslated output as it did before, but without the `:', everything
should match up and existing valid `bind -X' commands will be
unaffected.
---
diff --git a/bashline.c b/bashline.c
index 5dac2e9e..9d99c536 100644
--- a/bashline.c
+++ b/bashline.c
@@ -4702,7 +4702,7 @@ bind_keyseq_to_unix_command (char *line)
 {
   Keymap kmap, cmd_xmap;
   char *kseq, *value;
-  int i, kstart;
+  int i, kstart, translate;

   kmap = rl_get_keymap ();

@@ -4716,16 +4716,13 @@ bind_keyseq_to_unix_command (char *line)
   /* Create the key sequence string to pass to rl_generic_bind */
   kseq = substring (line, kstart, i);

-  for ( ; line[i] && line[i] != ':'; i++)
+  /* Advance to the colon (:) or whitespace which separates the two objects. */
+  for ( ; line[i] && line[i] != ':' && line[i] != ' ' && line[i] != '\t'; i++)
 ;
-  if (line[i] != ':')
-{
-  builtin_error (_("%s: missing colon separator"), line);
-  FREE (kseq);
-  return -1;
-}

-  i = isolate_sequence (line, i + 1, 0, &kstart);
+  translate = (line[i] != ':');
+
+  i = isolate_sequence (line, i + 1, translate, &kstart);
   if (i < 0)
 {
   FREE (kseq);
@@ -4737,7 +4734,10 @@ bind_keyseq_to_unix_command (char *line)

   /* Save the command to execute and the key sequence in the CMD_XMAP */
   cmd_xmap = get_cmd_xmap_from_keymap (kmap);
-  rl_generic_bind (ISMACR, kseq, value, cmd_xmap);
+  if (translate)
+rl_macro_bind (kseq, value, cmd_xmap);
+  else
+rl_generic_bind (ISMACR, kseq, value, cmd_xmap);

   /* and bind the key sequence in the current keymap to a function that
  understands how to execute from CMD_XMAP */
diff --git a/lib/readline/bind.c b/lib/readline/bind.c
index dc30dd84..9d4817a3 100644
--- a/lib/readline/bind.c
+++ b/lib/readline/bind.c
@@ -2861,18 +2861,12 @@ _rl_macro_dumper_internal (int print_readably,
Keymap map, char *prefix)
{
case ISMACR:
  keyname = _rl_get_keyname (key);
- if (print_readably < 0)
-   out = savestring ((char *)map[key].function);
- else
-   out = _rl_untranslate_macro_value ((char *)map[key].function, 0);
+ out = _rl_untranslate_macro_value ((char *)map[key].function, 0);

- if (print_readably < 0)
-   fprintf (rl_outstream, "\"%s%s\": %s\n", prefix ? prefix : "",
-keyname,
-out ? out : "");
- else if (print_readably > 0)
-   fprintf (rl_outstream, "\"%s%s\": \"%s\"\n", prefix ? prefix : "",
+ if (print_readably)
+   fprintf (rl_outstream, "\"%s%s\"%s \"%s\"\n", prefix ? prefix : "",
 keyname,
+print_readably > 0 ? 
":" : "",
 out ? out : "");
  else
fprintf (rl_outstream, "%s%s outputs %s\n", prefix ? prefix : "",



  1   2   3   4   5   >