Package: mc
Version: 3:4.8.21-1
Followup-For: Bug #628908

I am not sure if this is the same problem, but I have an archive 72MB in
size (~2GB before compression), compressed into tar.zst with about 1.1M
files (0 - 100 bytes each) and about 120 characters in path of each file.

Listing content is fast:

$ time zstdcat 
../gnuplot-fuzzing-instrumented-snapshot-2018-11-27T01\:12.tar.zst | tar tf - | 
wc -l
1139010

real    0m4.619s
user    0m3.935s
sys     0m4.387s
$

With metadataa:

$ time zstdcat 
../gnuplot-fuzzing-instrumented-snapshot-2018-11-27T01\:12.tar.zst | tar tvf -  
| wc -l
1139010

real    0m10.418s
user    0m6.176s
sys     0m8.681s
$

Via index file:

$ time zstdcat 
../gnuplot-fuzzing-instrumented-snapshot-2018-11-27T01\:12.tar.zst | tar -t -v 
--index-file=/tmp/zlk -f -  | wc -l
0

real    0m9.234s
user    0m5.837s
sys     0m5.323s
$ wc -l /tmp/zlk 
1139010 /tmp/zlk
$


Yes, mc is stuck for 20 minutes when trying to enter the same archive.
mc is using 100% CPU (one thread). tar or zstd processes nowhere
to be found tho.

strace -p `pidof mc`

is showing enormous amount of lseeks and small reads:

lseek(9, 512, SEEK_CUR)                 = 1075074048
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075075072
read(9, "././@LongLink\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075077120
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075078144
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075079168
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075080192
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075081216
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075082240
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075083264
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075084288
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075085312
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075086336
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075087360
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075088384
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075089408
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075090432
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075091456
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075092480
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075093504
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075094528
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075095552
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075096576
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075097600
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075098624
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075099648
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075100672
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075101696
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075102720
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075103744
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075104768
read(9, "././@LongLink\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075106816
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075107840
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075108864
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075109888
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075110912
read(9, "././@LongLink\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075112960
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075113984
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075115008
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075116032
read(9, "././@LongLink\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075118080
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075119104
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075120128
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075121152
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075122176
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075123200
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 1024, SEEK_CUR)                = 1075124736
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075125760
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075126784
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075127808
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075128832
read(9, "././@LongLink\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
lseek(9, 512, SEEK_CUR)                 = 1075130880
read(9, "././@LongLink\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512
read(9, "gnuplot-fuzzing-instrumented/out"..., 512) = 512

In /tmp/mc-user/ I found this file

$ ls -lh /tmp/mc-user/sfsB89YSZ 
-rw------- 1 user user 1.9G Nov 27 00:12 /tmp/mc-user/sfsB89YSZ
$ file /tmp/mc-user/sfsB89YSZ
/tmp/mc-user/sfsB89YSZ: POSIX tar archive (GNU)
$

So I am guessing mc did unpacked the file (instead of just streaming the
data on the fly) and now is trying to read metadata on its own without
the help of tar or using worse method than tar (that finishes in 10
seconds):

$ time tar tf /tmp/mc-user/sfsB89YSZ | wc -l
1139010

real    0m3.664s
user    0m2.777s
sys     0m2.644s
$
$ time tar tvf /tmp/mc-user/sfsB89YSZ | wc -l
1139010

real    0m10.144s
user    0m4.809s
sys     0m6.990s
$



PS. All files (/tmp and source file are in RAM on unionfs/tmpfs. I am
running out of the livecd with entire file system in memory).




-- System Information:
Debian Release: buster/sid
  APT prefers testing
  APT policy: (500, 'testing')
Architecture: amd64 (x86_64)
Foreign Architectures: i386

Kernel: Linux 4.18.0-2-amd64 (SMP w/32 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8), 
LANGUAGE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /usr/bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

Versions of packages mc depends on:
ii  libc6         2.27-8
ii  libext2fs2    1.44.4-2
ii  libglib2.0-0  2.58.1-2
ii  libgpm2       1.20.7-5
ii  libslang2     2.3.2-1+b1
ii  libssh2-1     1.8.0-2
ii  mc-data       3:4.8.21-1

Versions of packages mc recommends:
ii  mime-support  3.61
ii  perl          5.28.0-3
ii  unzip         6.0-21

Versions of packages mc suggests:
ii  arj                              3.10.22-17
ii  atril [pdf-viewer]               1.20.2-1
ii  bzip2                            1.0.6-9
pn  dbview                           <none>
pn  djvulibre-bin                    <none>
ii  file                             1:5.34-2
ii  genisoimage                      9:1.1.11-3+b2
pn  gv                               <none>
ii  imagemagick                      8:6.9.10.14+dfsg-7
ii  imagemagick-6.q16 [imagemagick]  8:6.9.10.14+dfsg-7
pn  libaspell-dev                    <none>
ii  links                            2.17-1
pn  odt2txt                          <none>
ii  poppler-utils                    0.69.0-2
ii  python                           2.7.15-3
pn  python-boto                      <none>
ii  python-tz                        2018.7-1
ii  texlive-binaries                 2018.20180907.48586-2
ii  zip                              3.0-11+b1

-- no debconf information

Reply via email to