Am 30.07.2015 um 20:42 schrieb Brandon McCaig:
> I'll give you my 2 cents for whatever that's worth. >:)
Thanks for your many comments.
The updated full project code is below. Feedback is appreciated. (you
also find this at github.com/simon0x5b/bookmark-djvu)
> I can't help thinking that the entire recursive subroutine could
> be simplified and tidied up a bit by changing the direction of
> it
could you elaborate on this?
Cheers, Simon
file bookmark-djvu-extract:
#!/usr/bin/env perl
# LICENSE: GPLv3+
use 5.010;
use warnings;
use strict;
use utf8;
use Getopt::Long qw/:config no_ignore_case bundling/;
use File::Basename qw/basename/;
use Encode qw/decode/;
if (!eval 'use YAML::XS qw/Dump/; 1;') {
die "cannot find the YAML::XS Perl module.\n" .
"Try '\$ apt-get install libyaml-libyaml-perl'.\n";
}
binmode STDOUT, ':encoding(UTF-8)';
binmode STDIN, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';
my $prog_name = basename $0;
my $bug_address = "www.github.com/simon0x5b/bookmark-djvu/issues";
sub print_usage {
say "Usage: $prog_name [OPTIONS] DJVU [-o BOOKMARKS_FILE]";
}
sub print_help {
print_usage ();
say "
extract the outline of DJVU.
The format of the generated BOOKMARKS-FILE is described in the README.
Options:
-o, --output=FILE write bookmarks to FILE
-s, --simple-format dump outline in simple format (default: YAML)
-h, --help print this help screen
-V, --version print program version
The default for BOOKMARKS-FILE is DJVU-FILENAME with the suffix changed to
- '.outline' for YAML mode (default).
- '.bm' for simple format.
Report bugs to $bug_address";
}
sub print_version {
say "bookmark-djvu 0.1
License GPLv3+: GNU GPL version 3 or later
<http://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law."
}
$_ = decode ('UTF-8', $_) for @ARGV;
my $output_filename;
my $use_simple_format = 0;
my %opt_hash = (
"output|o=s" => \$output_filename,
"simple-format|s" => \$use_simple_format,
"help|h" => sub {print_help (); exit 0},
"version|V" => sub {print_version (); exit 0},
);
print_usage (), exit (1) unless GetOptions (%opt_hash) and @ARGV;
my $djvu_filename = $ARGV[0];
if (! -f $djvu_filename) {
print_usage ();
die "error: djvu file '$djvu_filename' does not exist\n";
}
if (!$output_filename) {
my $suffix = $use_simple_format ? ".bm" : ".outline";
$output_filename =
get_default_filename ($djvu_filename, ".djvu", $suffix);
if (-e $output_filename) {
die "won't overwrite existing file '$output_filename'. " .
"Use the '-o FILE' option!";
}
}
open my $output_handle, ">", $output_filename
or die "cannot open file '$output_filename': $!";
# start extracting bookmarks with djvused in a pipe
open my $djvused_handle, "-|", ("djvused", $djvu_filename, "-u", "-e",
"print-outline") or die "cannot find djvused.\n" .
"Try '\$ apt-get install djvulibre-bin'.\n";
binmode $djvused_handle, ':encoding(UTF-8)';
my $outline_items = parse_djvused_output ($djvused_handle);
unless (@{$outline_items}) {
warn "no outline found\n";
exit 1;
}
close $djvused_handle
or die "djvused failed with exit status ", $? >> 8, "\n";
if ($use_simple_format) {
dump_to_simple_format ($output_handle, $outline_items);
}
else {
dump_to_yaml ($output_handle, $outline_items);
}
# arg: open file descriptor to djvused
# returns: arrayref of hashrefs, each describing level, title and page of an
# outline item.
sub parse_djvused_output {
my $djvused_handle = shift;
my $level = 0;
my $line = <$djvused_handle>;
if (!$line or $line !~ /^\(bookmarks$/) {
warn "no bookmarks found\n";
return [];
}
my $outline_items = [];
while ($line = <$djvused_handle>) {
chomp $line;
$line =~ /^\s*\("(?<title>.+)"$/
or die "line '$line' in djvused output does not match.
Please report to $bug_address\n";
my $title = djvused_unescape ($+{title});
$line = <$djvused_handle>;
$line =~ /\s*"#(?<page>[0-9]+)"(?<close>(\s*\))*)\s*$/
or die "line '$line' in djvused output does not match.
Please report to $bug_address\n";
push @$outline_items, {level => $level, title => $title,
page => $+{page}};
$level -= ($+{close} =~ tr/\)//) - 1;
}
return $outline_items;
}
sub dump_to_simple_format {
my $out_handle = shift;
my $outline_items = shift;
binmode $out_handle, ':encoding(UTF-8)';
for my $outline (@$outline_items) {
print {$out_handle} " "x($outline->{level}),
prepare_title ($outline->{title}),
, " $outline->{page}\n";
}
}
sub prepare_title {
my $title = shift;
if ($title =~ /\n/) {
warn "WARNING: removing newline in title '$title'.\n";
$title =~ s/\n/ /g;
}
if ($title =~ /(\.|\s)+$/) {
warn "WARNING: removing trailing dots and/or whitespace " .
"in title '$title'.\n";
$title =~ s/(\.|\s)+$//;
}
return $title;
}
sub dump_to_yaml {
my $out_handle = shift;
my $input_array = shift;
(my $array, undef) = get_recursive_array ($input_array, 0);
print {$out_handle} Dump ($array);
}
# recursively convert flat array into nested data structure
# args: input array ref, index to start
# returns: output array ref, following index
sub get_recursive_array {
my ($input_array, $index) = @_;
my $last = $#{$input_array};
my $base_level = $input_array->[$index]{level};
my $result_array = [];
while ($index <= $last) {
my $node = $input_array->[$index];
my $current_level = $node->{level};
last if $current_level < $base_level;
my $outline_item = {title => $node->{title}, page => $node->{page}};
my $next_index = $index + 1;
my $next_level = $input_array->[$next_index]{level};
if ($next_index <= $last && $next_level > $base_level) {
($outline_item->{kids}, $index) =
get_recursive_array ($input_array, $next_index);
}
else {
++$index;
}
push @{$result_array}, $outline_item;
}
return ($result_array, $index);
}
sub djvused_unescape {
# see in '$ man djvused': "DJVUSED FILE FORMATS - Strings"
# and perlrebackslash
my $string = shift;
$string =~ s/\\([0-7]{3}|.)/expand_escape_sequence ($1)/ge;
return $string;
}
sub expand_escape_sequence {
my ($string) = @_;
if ($string eq "\\" || $string eq '"') {
return $string;
}
elsif (length ($string) == 3) {
# octal escape sequence
return pack 'U', oct ($string);
}
elsif ($string eq "a") {
return "\a";
}
elsif ($string eq "b") {
return "\b";
}
elsif ($string eq "t") {
return "\t";
}
elsif ($string eq "n") {
return "\n";
}
elsif ($string eq "v") {
return "\x{0b}";
}
elsif ($string eq "f") {
return "\f";
}
elsif ($string eq "r") {
return "\r";
}
else {
die "unknown escape sequence '\\$string' in '$string'
please report to $bug_address";
}
}
sub get_default_filename {
my $filename = shift;
my $from = shift;
my $too = shift;
die "get_default_filename needs arg" unless defined ($too);
my $result_filename;
if ($filename =~ /\Q$from\E$/) {
($result_filename = $filename) =~ s/\Q$from\E/$too/;
}
else {
$result_filename = $filename . $too;
}
return $result_filename;
}
file bookmark-djvu:
#!/usr/bin/env perl
# License: GPLv3+
use 5.010;
use warnings;
use strict;
# do not use floating point arithmetic in divisions
use integer;
use utf8;
use Encode qw/decode/;
use Getopt::Long qw/:config no_ignore_case bundling/;
use File::Temp qw/tempfile/;
use File::Basename qw/basename/;
if (!eval 'use YAML::XS qw/Load/; 1;') {
die "cannot find the YAML::XS Perl module.\n" .
"Try '\$ apt-get install libyaml-libyaml-perl'.\n";
}
binmode STDOUT, ':encoding(UTF-8)';
binmode STDIN, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';
my $prog_name = basename $0;
my $bug_address = "www.github.com/simon0x5b/bookmark-djvu/issues";
sub print_usage {
say "Usage: $prog_name [OPTIONS] DJVU [BOOKMARKS-FILE]";
}
sub print_help {
print_usage ();
say "
update the outline of DJVU.
The format of BOOKMARKS-FILE is described in the README.
Options:
-o, --output=FILE do not modify DJVU and write the
bookmarked djvu file to FILE.
-s, --simple-format use simple format (default: YAML)
options controlling simple format:
-c, --allow-comments ignore all lines starting with a hash
character (#) in BOOKMARKS-FILE.
general options:
-h, --help print this help screen.
-V, --version print program version.
The default for BOOKMARKS-FILE is DJVU with the suffix changed to
- '.outline' for YAML mode (default).
- '.bm' for simple format.
Report bugs to $bug_address";
}
sub print_version {
say "bookmark-djvu 0.1
License GPLv3+: GNU GPL version 3 or later
<http://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law."
}
$_ = decode ('UTF-8', $_) for @ARGV;
my $output_filename;
my $use_simple_format = 0;
my $allow_comments = 0;
my %opt_hash = (
"output|o=s" => \$output_filename,
"simple-format|s" => \$use_simple_format,
"allow-comments|c" => \$allow_comments,
"help|h" => sub {print_help (); exit},
"version|V" => sub {print_version (); exit},
);
print_usage () , exit (1) unless GetOptions (%opt_hash) && @ARGV;
my $djvu_filename = $ARGV[0];
if (! -f $djvu_filename) {
print_usage ();
die "error: djvu file '$djvu_filename' does not exist\n";
}
$output_filename = $djvu_filename unless $output_filename;
my $bookmarks_filename;
if ($ARGV[1]) {
$bookmarks_filename = $ARGV[1];
}
else {
my $suffix = $use_simple_format ? ".bm" : ".outline";
$bookmarks_filename =
get_default_filename ($djvu_filename, ".djvu", $suffix);
}
open my $input_handle, '<', $bookmarks_filename
or die "cannot open $bookmarks_filename: $!\n";
my $page_count = page_count ($djvu_filename);
my $djvused_outline;
if ($use_simple_format) {
$djvused_outline = simple_format_to_djvused_outline ($input_handle);
}
else {
$djvused_outline = yaml_to_djvused_outline ($input_handle);
}
# run external commands
if ($djvu_filename ne $output_filename) {
system_wrapper ("cp", $djvu_filename, $output_filename);
}
system_wrapper ("djvused", $output_filename, "-e",
"set-outline $djvused_outline", "-s");
# argument: djvu filename
# returns: page count
sub page_count {
my $filename = shift;
open my $djvused_handle, "-|", "djvused", $filename, "-e", "n"
or die "cannot find djvused.\n" .
"Try '\$ apt-get install djvulibre-bin'.\n";
my $count = <$djvused_handle>;
if (!close $djvused_handle) {
die "djvused failed with status ", $? >> 8, ".\n";
}
chomp ($count);
return $count;
}
# returns: filename
sub simple_format_to_djvused_outline {
my $input_handle = shift;
binmode $input_handle, ':encoding(UTF-8)';
(my $out_fh, my $out_filename) = tempfile (UNLINK => 1);
binmode $out_fh, ':encoding(UTF-8)';
my $regexp = qr/
^
(?<indent>(\ {4})*)
(?<title>.+?)
(\s|\.)+
(?<page>-?[0-9]+)
\s*$
/x;
my $line_number = 0;
my $offset = 0;
my $indent_depth;
my $prev_indent_depth = -1;
print {$out_fh} "(bookmarks";
while (my $line = <$input_handle>) {
chomp ($line);
++$line_number;
# skip blank lines or comments
next if $line =~ /^\s*$/ || ($allow_comments && $line =~ /^#/);
# check for offset marker
if ($line =~ /^\s*d=(?<difference>-?[0-9]+)\s*$/) {
$offset = $+{difference};
next;
}
# match line against $regexp
if ($line !~ $regexp) {
die "error: line $line_number is broken:\n$line\n";
}
$indent_depth = length ($+{indent}) / 4;
my $title = $+{title};
my $page = $+{page} + $offset;
if ($page < 1 || $page > $page_count) {
die "page number $page in line $line_number out of range ".
"(page count: $page_count)\n";
}
if ($indent_depth > $prev_indent_depth + 1) {
die "too mutch indentation in line $line_number:\n$line\n";
}
$title = djvused_escape ($title);
# print closing parenthesis for the previous entry
print {$out_fh} ")"x($prev_indent_depth - $indent_depth + 1), "\n";
$prev_indent_depth = $indent_depth;
# print out this outline item
print {$out_fh} qq{("$title" "#$page"};
}
if ($prev_indent_depth == -1) {
warn "removing outline\n";
}
# print closing parenthesis
print {$out_fh} ")"x($prev_indent_depth + 1), "\n", ")", "\n";
# flush buffer
close $out_fh;
return $out_filename;
}
# arg: input filedescriptor
# returns output filename
sub yaml_to_djvused_outline {
my $input_handle = shift;
(my $out_fh, my $out_filename) = tempfile (UNLINK => 1);
binmode $out_fh, ':encoding(UTF-8)';
my $outline = Load (do {local $/; local $_ = <$input_handle>});
print {$out_fh} "(bookmarks";
if ($outline) {
array_ref_to_djvused_outline ($out_fh, $outline);
}
else {
warn "no input, removing outline\n";
}
print {$out_fh} "\n)\n";
return $out_filename;
}
# used by yaml_to_djvused_outline
# prints djvused outline recursively from array ref
# args: output file descriptor, arrayref
sub array_ref_to_djvused_outline {
my $output_handle = shift;
my $outline = shift;
unless (ref ($outline) eq "ARRAY") {
die "array_ref_to_djvused_outline: arg not an array ref";
}
for my $hashref (@{$outline}) {
my $title = $hashref->{title} or die "missing title in outline hash";
$title = djvused_escape ($title);
my $page = $hashref->{page};
if ($page < 1 || $page > $page_count) {
die "page number $page out of range ".
"(page count: $page_count)\n";
}
# print out this outline item
print {$output_handle} qq{\n("$title" "#$page"};
if ($hashref->{kids}) {
array_ref_to_djvused_outline ($output_handle, $hashref->{kids});
}
print {$output_handle} ")";
}
}
sub djvused_escape {
# see djvused(1): "DJVUSED FILE FORMATS - Strings"
# and perlrebackslash
my $title = shift;
if ($title =~ /\n/) {
die "djvused_escape: \\n in '$title'\n" .
"Please report this at $bug_address\n"
}
# need to escape \, ", \a, \x{08}, \t, \x{0b}, \f and \r
# \ is first, since it is contained in the replacement texts
$title =~ s/\\/\\\\/g;
$title =~ s/"/\\"/g;
$title =~ s/\a/\\a/g;
$title =~ s/\x{08}/\\b/g;
$title =~ s/\t/\\t/g;
$title =~ s/\x{0b}/\\v/g;
$title =~ s/\f/\\f/g;
$title =~ s/\r/\\r/g;
return $title;
}
sub system_wrapper {
warn "command: @_\n";
system (@_) == 0
or die "error: system failed: $?\n"
}
sub get_default_filename {
my $filename = shift;
my $from = shift;
my $too = shift;
die "get_default_filename needs arg" unless defined ($too);
my $result_filename;
if ($filename =~ /\Q$from\E$/) {
($result_filename = $filename) =~ s/\Q$from\E/$too/;
}
else {
$result_filename = $filename . $too;
}
return $result_filename;
}
file t/test:
#!/usr/bin/env perl
# LICENSE: GPLv3+
use 5.014;
use warnings;
use strict;
use File::Temp qw/tempfile/;
use utf8;
use Test::More;
if (!eval 'use YAML::XS qw/LoadFile/; 1;') {
die "cannot find the YAML::XS Perl module.\n" .
"Try '\$ apt-get install libyaml-libyaml-perl'.\n";
}
use constant GET_OUTLINE => "../bookmark-djvu-extract";
use constant SET_OUTLINE => "../bookmark-djvu";
# create empty djvu
my $djvu = create_djvu (8);
# ################################# #
# compare files after set and get: #
# ################################# #
system_ok (SET_OUTLINE . " -s $djvu input1");
(undef, my $outline) = tempfile (UNLINK => 1);
system_ok (GET_OUTLINE . " -s $djvu -o $outline");
# compare
files_ok ($outline, "expected1");
unlink ($outline);
# default filenames
$djvu = create_djvu (8);
$outline = ($djvu =~ s/\.djvu$/.bm/r);
say "outline: $outline";
system_ok ("cp input1 $outline");
# compare files after set and get:
system_ok (SET_OUTLINE . " -s $djvu");
unlink ($outline);
system_ok (GET_OUTLINE . " -s $djvu");
# compare
files_ok ($outline, "expected1");
unlink ($outline);
# #### #
# YAML #
# #### #
$djvu = create_djvu (5);
system_ok (SET_OUTLINE . " $djvu input1.yaml");
(undef, $outline) = tempfile (UNLINK => 1);
system_ok (GET_OUTLINE . " $djvu -o $outline");
yaml_ok ("input1.yaml", $outline);
# default filenames
$djvu = create_djvu (5);
$outline = ($djvu =~ s/\.djvu$/.outline/r);
say "outline: $outline";
system_ok ("cp input1.yaml $outline");
# compare files after set and get:
system_ok (SET_OUTLINE . " $djvu");
unlink ($outline);
system_ok (GET_OUTLINE . " $djvu");
# compare
yaml_ok ("input1.yaml", $outline);
unlink ($outline);
# ############### #
# remove outlines #
# ############### #
$djvu = create_djvu (5);
system_ok (SET_OUTLINE . " $djvu input1.yaml");
# empty input file
system_ok (SET_OUTLINE . " $djvu /dev/null");
system_is (GET_OUTLINE . " $djvu -o /dev/null", 1);
#simple format
$djvu = create_djvu (5);
system_ok (SET_OUTLINE . " $djvu input1.yaml");
# empty input file
system_ok (SET_OUTLINE . " $djvu -s /dev/null");
system_is (GET_OUTLINE . " $djvu -o /dev/null", 1);
done_testing ();
sub yaml_ok {
my ($file1, $file2) = @_;
is_deeply (LoadFile ($file1), LoadFile ($file2),
"yaml_ok: file1=$file1, file2=$file2");
}
sub system_ok {
my ($command) = @_ ;
ok (system ($command) == 0, "command: $command");
}
sub system_is {
my ($command, $exit_status) = @_;
system ($command);
# see 'perldoc -f system'
ok ($? != -1
&& !($? & 127)
&& $? >> 8 == $exit_status,
"exit status of '$command' is '$exit_status'");
}
sub files_ok {
system_ok ("diff -C 3 $_[0] $_[1]");
}
# arguments: page count, suffix
# returns filename
sub create_djvu {
my ($page_count, $suffix) = @_;
$suffix or $suffix = ".djvu";
$page_count > 0 or $page_count = 1;
# create one-page djvu file
(undef, my $filename) = tempfile (UNLINK => 1, SUFFIX => $suffix);
system_wrapper ("djvumake $filename INFO=100,100,100");
my $page;
if ($page_count > 1) {
(undef, $page) = tempfile (UNLINK => 1, SUFFIX => ".djvu");
system_wrapper ("djvumake $page INFO=100,100,100");
}
for (2..$page_count) {
system_wrapper ("djvm -i $filename $page");
}
return $filename;
}
sub system_wrapper {
my ($command) = @_;
system ($command) == 0
or die "command '$command' failed with status ", $? >> 8, "\n";
}
--
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
http://learn.perl.org/