Package: po4a Version: 0.36.1-1 Severity: normal Tags: patch Hi,
As subject says, attached is a patch for improved Markdown support in text module. If needed I can distill my evolutionary notes. You can also see for yourself with the following commands: git clone git://source.jones.dk/ikiwiki cd ikiwiki git log d0c079.. -- perl/Locale/Po4a/Text.pm git log -p d0c079.. -- perl/Locale/Po4a/Text.pm (last command shows progressive patches - in case you don't know Git) Please apply this for more reliable l10n handling in upcoming po pluging for ikiwiki. Kind regards, - Jonas -- System Information: Debian Release: squeeze/sid APT prefers unstable APT policy: (500, 'unstable') Architecture: amd64 (x86_64) Kernel: Linux 2.6.30-rc5-amd64 (SMP w/2 CPU cores) Locale: LANG=da_DK.UTF-8, LC_CTYPE=da_DK.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/bash Versions of packages po4a depends on: ii gettext 0.17-6 GNU Internationalization utilities ii libsgmls-perl 1.03ii-32 Perl modules for processing SGML p ii perl 5.10.0-22 Larry Wall's Practical Extraction ii perl-modules 5.10.0-22 Core Perl modules ii sp 1.3.4-1.2.1-47 James Clark's SGML parsing tools Versions of packages po4a recommends: ii liblocale-gettext-perl 1.05-4 Using libc functions for internati ii libterm-readkey-perl 2.30-4 A perl module for simple terminal ii libtext-wrapi18n-perl 0.06-6 internationalized substitute of Te po4a suggests no packages. -- no debconf information
--- /home/jonas/src/tmp/IKIWIKI/po4a-0.36.1/lib/Locale/Po4a/Text.pm 2009-04-05 14:10:21.000000000 +0200 +++ Text.pm 2009-05-25 22:40:39.000000000 +0200 @@ -143,6 +143,7 @@ my $paragraph=""; my $wrapped_mode = 1; my $expect_header = 1; + my $end_of_paragraph = 0; ($line,$ref)=$self->shiftline(); my $file = $ref; $file =~ s/:[0-9]+$//; @@ -152,6 +153,8 @@ $file = $1; do_paragraph($self,$paragraph,$wrapped_mode); $paragraph=""; + $wrapped_mode = 1; + $expect_header = 1; } chomp($line); @@ -464,37 +467,53 @@ $self->{indent} = $indent; $self->{bullet} = ""; } - } elsif ( $line =~ /^=*$/ - or $line =~ /^_*$/ - or $line =~ /^-*$/) { + } elsif ($line =~ /^-- $/) { + # Break paragraphs on email signature hint + do_paragraph($self,$paragraph,$wrapped_mode); + $paragraph=""; + $wrapped_mode = 1; + $self->pushline($line."\n"); + } elsif ( $line =~ /^=+$/ + or $line =~ /^_+$/ + or $line =~ /^-+$/) { $wrapped_mode = 0; $paragraph .= $line."\n"; do_paragraph($self,$paragraph,$wrapped_mode); $paragraph=""; $wrapped_mode = 1; } elsif ($markdown and + ( $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin + or $line =~ /^\s*"""\s*\]\]\s*$/)) { # """ textblock inside macro end + # Avoid translating Markdown lines containing only markup + do_paragraph($self,$paragraph,$wrapped_mode); + $paragraph=""; + $wrapped_mode = 1; + $self->pushline("$line\n"); + } elsif ($markdown and ( $line =~ /^#/ # headline or $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/)) { # sole macro - # Found Markdown markup that should be preserved as a single line + # Preserve some Markdown markup as a single line do_paragraph($self,$paragraph,$wrapped_mode); $paragraph="$line\n"; $wrapped_mode = 0; + $end_of_paragraph = 1; + } elsif ($markdown and + ( $line =~ /^"""/)) { # """ textblock inside macro end + # Markdown markup needing separation _before_ this line do_paragraph($self,$paragraph,$wrapped_mode); + $paragraph="$line\n"; $wrapped_mode = 1; - $paragraph=""; - } elsif ($markdown and - ( $paragraph =~ m/^>/ # blockquote - or $paragraph =~ m/[<>]/ # maybe html - or $paragraph =~ m/^"""/ # textblock inside macro end - or $paragraph =~ m/"""$/)) { # textblock inside macro begin - # Found Markdown markup that might not survive wrapping - $wrapped_mode = 0; - $paragraph .= $line."\n"; } else { if ($line =~ /^\s/) { # A line starting by a space indicates a non-wrap # paragraph $wrapped_mode = 0; + } + if ($markdown and + ( $line =~ /\S $/ # explicit newline + or $line =~ /"""$/)) { # """ textblock inside macro begin + # Markdown markup needing separation _after_ this line + $end_of_paragraph = 1; } else { undef $self->{bullet}; undef $self->{indent}; @@ -510,7 +529,24 @@ # (more than 3) # are considered as verbatim paragraphs $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s - or $paragraph =~ m/[ \t][ \t][ \t]/s); + or $paragraph =~ m/[ \t][ \t][ \t]/s); + if ($markdown) { + # Some Markdown markup can (or might) not survive wrapping + $wrapped_mode = 0 if ( + $paragraph =~ /^>/ms # blockquote + or $paragraph =~ /^( {8}|\t)/ms # monospaced + or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro + or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>) + or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space) + or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin + ); + } + if ($end_of_paragraph) { + do_paragraph($self,$paragraph,$wrapped_mode); + $paragraph=""; + $wrapped_mode = 1; + $end_of_paragraph = 0; + } ($line,$ref)=$self->shiftline(); } if (length $paragraph) {