This is current/amd64.

On UTF input, awk segfaults when using a multi-character RS:

$ cat /tmp/in
č

$ hexdump -C /tmp/in
00000000  c4 8d 0a                                          |...|
00000003

$ cat /tmp/in | awk '{print$1}'
č

$ cat /tmp/in | awk -v RS=x '{print$1}'
č

$ cat /tmp/in | awk -v RS=xy '{print$1}'
Segmentation fault (core dumped)

See the debug (awk -d3) output below.

It does not happen with plain ascii input.
If happens reproducibly if the input contains an UTF char.

According to the manpage,

         RS     Input record separator (default newline).
                If empty, blank lines separate records.
                If more than one character long, RS is treated
                as a regular expression, and records are separated
                by text matching the expression.


So I *speculate* that the regexp matching is what breaks
when matching against non-ascii input; that's a bit over my head,
but the last debug message before the segfault is "reparse <xy>".

Jan


Script started on Mon Jul 13 12:58:39 2020
hans@biblio:~$ cat /tmp/in | awk -d3 -v RS=xy '{print$1}'

awk version 20200702
setsymtab set 0x1e34d6ca8e00: n=RS s="xy" f=0 t=2
starting setsval 0x1e34d6ca8e00: RS = "xy", t=2, r,f=0,0
setsval 0x1e34d6ca8e00: RS = "xy (0x1e34deeddd10) ", t=2 r,f=0,0
command line set RS to |xy|
program = |{print$1}|
setsymtab set 0x1e34d256d0c0: n=0 s="0" f=0 t=17
setsymtab set 0x1e34d256d840: n=$zero&null s="" f=0 t=17
setsymtab set 0x1e34d6ca8a00: n=FS s=" " f=0 t=6
setsymtab found 0x1e34d6ca8e00: n=RS s="xy" f=0 t=2
setsymtab set 0x1e3479030080: n=OFS s=" " f=0 t=6
setsymtab set 0x1e3479030c00: n=ORS s="
" f=0 t=6
setsymtab set 0x1e34d256d340: n=OFMT s="%.6g" f=0 t=6
setsymtab set 0x1e34e3d38cc0: n=CONVFMT s="%.6g" f=0 t=6
setsymtab set 0x1e34e3d38e00: n=FILENAME s="" f=0 t=6
setsymtab set 0x1e34d256d780: n=NF s="" f=0 t=1
setsymtab set 0x1e34790308c0: n=NR s="" f=0 t=1
setsymtab set 0x1e34d6ca84c0: n=FNR s="" f=0 t=1
setsymtab set 0x1e34d6ca8980: n=SUBSEP s="" f=0 t=6
setsymtab set 0x1e34e3d38040: n=RSTART s="" f=0 t=1
setsymtab set 0x1e34d256d4c0: n=RLENGTH s="" f=0 t=1
setsymtab set 0x1e34d256d900: n=SYMTAB s="" f=0 t=20
argc=1, argv[0]=awk
setsymtab set 0x1e34d6ca89c0: n=ARGC s="" f=1 t=1
setsymtab set 0x1e34e3d38d80: n=ARGV s="" f=0 t=20
setsymtab set 0x1e34d256d280: n=0 s="awk" f=0 t=2
setsymtab set 0x1e34d256dc80: n=ENVIRON s="" f=0 t=20
setsymtab set 0x1e3479030680: n=_ s="/usr/bin/awk" f=0 t=2
setsymtab set 0x1e34e3d38100: n=LOGNAME s="hans" f=0 t=2
setsymtab set 0x1e34d256db00: n=WINDOWID s="60817421" f=6.08174e+07 t=3
setsymtab set 0x1e3479030640: n=XTERM_SHELL s="/bin/ksh" f=0 t=2
setsymtab set 0x1e34e3d38ac0: n=HOME s="/home/hans" f=0 t=2
setsymtab set 0x1e34d256d880: n=CVSROOT s="[email protected]:/cvs" f=0 t=2
setsymtab set 0x1e34e3d38740: n=XTERM_VERSION s="XTerm/OpenBSD(351)" f=0 t=2
setsymtab set 0x1e34d256d400: n=LC_MONETARY s="C" f=0 t=2
setsymtab set 0x1e34d6ca8dc0: n=LANG s="C" f=0 t=2
setsymtab set 0x1e34d256d100: n=MANPATH 
s="/home/hans/man:/usr/local/man:/usr/share/man:/usr/X11R6/man" f=0 t=2
setsymtab set 0x1e3479030d80: n=SSH_AUTH_SOCK 
s="/tmp/ssh-IQXXB2Cn9jHz/agent.92135" f=0 t=2
setsymtab set 0x1e34790300c0: n=PAGER s="less" f=0 t=2
setsymtab set 0x1e34d256d8c0: n=CVS_RSH s="ssh" f=0 t=2
setsymtab set 0x1e34d6ca8480: n=SHELL s="/bin/ksh" f=0 t=2
setsymtab set 0x1e34d256de80: n=PS1 s="\u@\h:\W$ " f=0 t=2
setsymtab set 0x1e34e3d38880: n=TERM s="xterm-color" f=0 t=2
setsymtab set 0x1e34e3d38b40: n=XTERM_LOCALE s="cs_CZ.UTF-8" f=0 t=2
setsymtab set 0x1e34e3d386c0: n=LC_NUMERIC s="C" f=0 t=2
setsymtab set 0x1e34d6ca8a80: n=WINDOWPATH s="ttyC4" f=0 t=2
setsymtab set 0x1e3479030dc0: n=PWD s="/home/hans" f=0 t=2
setsymtab set 0x1e34e3d38a00: n=XENVIRONMENT s="/home/hans/.Xresources" f=0 t=2
setsymtab set 0x1e34790302c0: n=LC_CTYPE s="en_US.UTF-8" f=0 t=2
setsymtab set 0x1e3479030e40: n=DISPLAY s=":0" f=0 t=2
setsymtab set 0x1e34e3d38440: n=LESS s="-g -i -M -R" f=0 t=2
setsymtab set 0x1e3479030b40: n=SSH_AGENT_PID s="86537" f=86537 t=3
setsymtab set 0x1e34d6ca8b40: n=LC_TIME s="C" f=0 t=2
setsymtab set 0x1e34d6ca8f40: n=BASH_ENV s="/home/hans/.shrc" f=0 t=2
setsymtab set 0x1e3479030c40: n=LC_MESSAGES s="C" f=0 t=2
setsymtab set 0x1e34d256dbc0: n=ENV s="/home/hans/.shrc" f=0 t=2
setsymtab set 0x1e3479030c80: n=PATH 
s="/home/hans/bin:/home/hans/bin:/bin:/usr/bin:/sbin:/usr/sbin:/usr/X11R6/bin:/usr/local/bin:/usr/local/sbin"
 f=0 t=2
setsymtab set 0x1e34d6ca8ec0: n=HISTFILE s="/home/hans/.history" f=0 t=2
setsymtab set 0x1e34d256dd80: n=USER s="hans" f=0 t=2
lex token 123
adjbuf gettok: 5 100 (pbuf=0x1e34deedd500, tbuf=0x1e34de2a3480)
lex PRINT
lex INDIRECT
setsymtab set 0x1e34e3d38e40: n=1 s="1" f=1 t=11
lex NUMBER
lex token 59
lex token 125
errorflag=0
RS=<xy>, FS=< >, ARGC=1, FILENAME=
getsval 0x1e34d6ca8a00: FS = "  (0x1e34f11bd350)", t=6
argno=1, file=||
getsval 0x1e34d6ca8e00: RS = "xy (0x1e34deeddd10)", t=2
reparse <xy>
Segmentation fault (core dumped) 
hans@biblio:~$ ^D


Reply via email to