Package: perl Version: 5.20.2-3+deb8u4 Severity: normal Tags: upstream patch
Dear Maintainer, There is a bug in Perl 5.8.9 (at least) that causes regular expressions an malformed UTF8 inputs to go into a forever loop and consume 100% CPU. Upstream's tracker url is https://rt.perl.org/Public/Bug/Display.html?id=123562. Patch is at http://perl5.git.perl.org/perl.git/commitdiff/22b433eff9a1ffa2454e18405a56650f07b385b5 and attached is a version rebased for Debian Jessie. I have not confirmed it, but based on the versions numbers I believe Stretch and Sid are also affected. -- System Information: Debian Release: 8.4 APT prefers stable-updates APT policy: (500, 'stable-updates'), (500, 'stable') Architecture: amd64 (x86_64) Kernel: Linux 4.4.0-0.bpo.1-amd64 (SMP w/4 CPU cores) Locale: LANG=en_US.utf8, LC_CTYPE=en_US.utf8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system) Versions of packages perl depends on: ii dpkg 1.17.26 ii libbz2-1.0 1.0.6-7+b3 ii libc6 2.19-18+deb8u4 ii libdb5.3 5.3.28-9 ii libgdbm3 1.8.3-13.1 ii perl-base 5.20.2-3+deb8u4 ii perl-modules 5.20.2-3+deb8u4 ii zlib1g 1:1.2.8.dfsg-2+b1 Versions of packages perl recommends: ii netbase 5.3 pn rename <none> Versions of packages perl suggests: pn libterm-readline-gnu-perl | libterm-readline-perl-perl <none> ii make 4.0-8.1 ii perl-doc 5.20.2-3+deb8u4 -- no debconf information
Index: perl-5.20.2/regexec.c =================================================================== --- perl-5.20.2.orig/regexec.c +++ perl-5.20.2/regexec.c @@ -7830,6 +7830,10 @@ S_reghop3(U8 *s, SSize_t off, const U8* if (UTF8_IS_CONTINUED(*s)) { while (s > lim && UTF8_IS_CONTINUATION(*s)) s--; + if (! UTF8_IS_START(*s)) { + dTHX; + Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)"); + } } /* XXX could check well-formedness here */ } @@ -7856,6 +7860,10 @@ S_reghop4(U8 *s, SSize_t off, const U8* if (UTF8_IS_CONTINUED(*s)) { while (s > llim && UTF8_IS_CONTINUATION(*s)) s--; + if (! UTF8_IS_START(*s)) { + dTHX; + Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)"); + } } /* XXX could check well-formedness here */ } @@ -7887,6 +7895,10 @@ S_reghopmaybe3(U8* s, SSize_t off, const if (UTF8_IS_CONTINUED(*s)) { while (s > lim && UTF8_IS_CONTINUATION(*s)) s--; + if (! UTF8_IS_START(*s)) { + dTHX; + Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)"); + } } /* XXX could check well-formedness here */ } Index: perl-5.20.2/t/re/pat.t =================================================================== --- perl-5.20.2.orig/t/re/pat.t +++ perl-5.20.2/t/re/pat.t @@ -20,7 +20,7 @@ BEGIN { require './test.pl'; } -plan tests => 726; # Update this when adding/deleting tests. +plan tests => 727; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1602,6 +1602,21 @@ EOP ok(1, "did not crash"); ok($match, "[bbb...] resolved as character class, not subscript"); } + { # Test that we handle some malformed UTF-8 without looping [perl + # #123562] + + my $code=' + BEGIN{require q(test.pl);} + use Encode qw(_utf8_on); + my $malformed = "a\x80\n"; + _utf8_on($malformed); + watchdog(3); + $malformed =~ /(\n\r|\r)$/; + print q(No infinite loop here!); + '; + fresh_perl_like($code, qr/Malformed UTF-8 character/, {}, + "test that we handle some UTF-8 malformations without looping" ); + } } # End of sub run_tests 1;