On Tuesday 11 Dec 2012 19:21:53 Ralph Corderoy wrote: > Hi again, > > > The O'Reilly book members of this list re-created, _Unix Text > > Processing_, is getting some discussion on Hacker News at the moment. > > Thought it may be of interest to the list. > > http://news.ycombinator.com/item?id=3D4902595 > > One comment that came up there... > > Is it also possible to generate the PDF hyperlinks/outline so it is > easier to navigate the document? > > Now time's passed, should we re-issue UTP links in the PDF? I think > work by others here means that's more do-able than back then. > > Cheers, Ralph.
Hi Ralph, I thought I might have a go at this. The attached patch adds the required hyperlinks. Use like this:- [derij@pip a]$ patch -p1 <../patch-utp-1.0 patching file front.t patching file ix.macro patching file Makefile patching file mkindex.pl patching file see.terms patching file toc.awk patching file utp_book.t patching file utp.mac [derij@pip a]$ rm -rf index #optional - no longer used [derij@pip a]$ chmod +x mkindex.pl [derij@pip a]$ make Assuming the fix for ch09 has already been applied. The make now uses the gropdf driver. I rewrote the index generator completely and currently there are a few differences/glitches in the order of entries, but the content is all there. It is still a work in progress so please suggest improvements. One problem is that it fails if you use pdfroff at the moment (a ghostscript error) so I'm still looking into this. Cheers Deri
diff -uNB a/front.t b/front.t --- a/front.t 2004-06-15 03:54:30.000000000 +0100 +++ b/front.t 2013-01-09 19:18:10.926958445 +0000 @@ -6,8 +6,9 @@ .so utp.mac .utp .page iii -.ps 200 +.nop \Z@\D't 8p'@ .Hl +.nop \D't 0' .sp .6i .DS R .ps 52 Common subdirectories: a/index and b/index diff -uNB a/ix.macro b/ix.macro --- a/ix.macro 2003-07-28 02:07:15.000000000 +0100 +++ b/ix.macro 1970-01-01 01:00:00.000000000 +0100 @@ -1,4 +0,0 @@ -.de ix -.ie '\\n(.z'' .tm ix: \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9 \\n% -.el \\!.ix \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9 -.. diff -uNB a/Makefile b/Makefile --- a/Makefile 2003-07-29 04:26:26.000000000 +0100 +++ b/Makefile 2013-01-10 23:54:16.714084613 +0000 @@ -15,15 +15,15 @@ ch15.t ch16.t ch17.t ch18.t appa.t appb.t appc.t appd.t appe.t \ appf.t appg.t -utp_book.ps: toc.t utp_ix.t - $(GROFF) -step -ms -rRef=0 utp_book.t >$@.tmp +utp_book.pdf: toc.t utp_ix.t + $(GROFF) -Tpdf -P-e -P-pletter -step -ms -z -rRef=0 -dPDF.EXPORT=1 utp_book.t 2>&1 | grep '^.ds' | $(GROFF) -Tpdf -mpdfmark -P-pletter -P-e -step -ms -rRef=0 - utp_book.t >$@.tmp mv $@.tmp $@ clean:: - rm -f utp_book.ps utp_book.ps.tmp + rm -f utp_book.pdf toc.t: $(CHAPTERS) - $(GROFF) -step -ms -rRef=1 ix.macro utp_book.t >/dev/null 2>utp.aux.tmp + $(GROFF) -Tpdf -P-e -P-pletter -z -step -rpdf:bm.nr=1 -ms -rRef=1 -wall utp_book.t >/dev/null 2>utp.aux.tmp mv utp.aux.tmp utp.aux $(AWK) -f toc.awk utp.aux >$@.tmp mv $@.tmp $@ @@ -32,7 +32,7 @@ rm -f utp.aux.tmp utp.aux toc.t toc.t.tmp utp_ix.t: $(CHAPTERS) - cd index && ./make.index ../utp.aux >../$@.tmp + ./mkindex.pl utp.aux > $@.tmp mv $@.tmp $@ clean:: @@ -50,7 +50,6 @@ $(VIEW) x.ps %.pdf : %.t - groff -step -ms -rRef=0 $< > x.ps - ps2pdf x.ps + groff -step -ms -rRef=0 -Tpdf $< > x.pdf mv x.pdf $@ - $(VIEW) x.ps + $(VIEW) x.pdf diff -uNB a/mkindex.pl b/mkindex.pl --- a/mkindex.pl 1970-01-01 01:00:00.000000000 +0100 +++ b/mkindex.pl 2013-01-10 23:32:01.198153816 +0000 @@ -0,0 +1,412 @@ +#!/usr/bin/perl -w +# +# mkindex.pl : Make index for UTP project +# Deri James : Monday 07 Jan 2013 +# +# Input file produced by .ix macro +# + +use strict; +use constant { + TXT => 0, + PAGE => 1, + LABEL => 2, + TYPE => 3, +}; + +my (%idx,%bkm,%wds,%ltr); + +while (<>) +{ + chomp; + + next if ! s/^ix: //; + + my (@r)=split("\t"); + + # clean + + foreach (@r) + { + s/^ +//; + s/ +$//; + s/ +/ /g; + } + + # deroman + +# $r[PAGE]=1000-from_roman($r[PAGE]) if $r[PAGE]=~m/^[ivxlc]+$/; + + # range.prep + + $r[TYPE]='a'; + $r[TYPE]='b' if $r[TXT]=~s/\%begin //; + $r[TYPE]='e' if $r[TXT]=~s/\%end //; + + # rotate + + my (@tok,@keys); + + if ($r[TXT]=~m/ %key /) + { + $tok[0]=$r[TXT]; + } + else + { + (@tok)=split(' ',$r[TXT]); + } + + foreach (@tok) {s/%~/QQ1QQ/g; tr/~/ /; s/QQ1QQ/%~/g;} + + foreach my $j (0..$#tok) + { + my $key=join(' ',@tok[$j..$#tok]); + $key.=', '.join(' ',@tok[0..$j-1]) if $j>0; + + if ($r[TYPE] eq 'a') + { + push(@{$idx{$key}->{DATA}},[$r[LABEL],$r[PAGE]]); + } + elsif ($r[TYPE] eq 'b') + { + $idx{$key}->{$r[TYPE]}=[$r[LABEL],$r[PAGE]]; + } + else + { + if (exists($idx{$key}->{b})) + { + my $st=$idx{$key}->{b}->[1]; + my $lab=$idx{$key}->{b}->[0]; + $st.="\\[en]$r[PAGE]" if $r[PAGE] > $st; + push(@{$idx{$key}->{DATA}},[$lab,$st]); + delete($idx{$key}->{b}); + } + else + { + print STDERR "No matching %begin record for key '$key'\n"; + } + } + } +} + +# see terms + +if (open(F,"<see.terms")) +{ + while (<F>) + { + chomp; + + my (@r)=split("\t"); + + if ($r[2] and $r[2]=~m/\%also/) + { + push(@{$idx{"$r[0], {see also} $r[1]"}->{DATA}},[]); + } + else + { + push(@{$idx{"$r[0], {see} $r[1]"}->{DATA}},[]); + } + } + + close(F); +} + +# gen.key + +foreach my $key (keys %idx) +{ + my ($skey,$val); + + if ($key=~m/^(.*) %key (.*)/) + { + $skey=$2; + $val=$1; + $skey=~tr'/''d; + } + else + { + $skey=Clean($key); + $val=$key; + + } + + $skey="!!$skey" if $skey=~m/^[^a-zA-Z]+$/; + $skey="!$skey" if $skey=~m/^[0-9]/; + +# $skey=lc($skey); + if (exists($bkm{$skey})) + { + push(@{$bkm{$skey}->{DATA}},@{$idx{$key}->{DATA}}); + $bkm{$skey}->{CT}+=$#{$idx{$key}->{DATA}}+1; + print STDERR "Duplicate key '$skey' merged\n"; + } + else + { + $bkm{$skey}->{DATA}=$idx{$key}->{DATA}; + $bkm{$skey}->{CT}=$#{$idx{$key}->{DATA}}+1; + $bkm{$skey}->{AKEY}=$val; + } +} + +foreach my $key (sort {lc($a) cmp lc($b)} keys %bkm) +{ + my $skey=$key; + $skey=~tr[,][]d; + my ($wd1,$wd2)=split(' ',$skey); + $wd2='' if !defined($wd2); + my $flt=substr($wd1,0,1); + my $typ=2; + my $txt=$bkm{$key}->{AKEY}; + + # types: 1="{see...}",2=normal,4="^[...]", 3="...[...]..." + + if ($txt=~m/{see/) + { + $typ=1; + } + elsif ($txt=~m/^\[/) + { + $typ=4; + } + elsif ($txt=~m/\[.*\]/) + { + $typ=3; + } + + my $nowds=split(' ',$txt); + + $wd2=Clean($wd2); + + $wds{$wd1}->[$typ]->{WCT}++,$wds{$wd1}->[$typ]->{MINWDS}=$nowds if !exists($wds{$wd1}->[$typ]->{WORDS}->{$wd2}); + push(@{$wds{$wd1}->[$typ]->{WORDS}->{$wd2}},$bkm{$key}); + $wds{$wd1}->[$typ]->{CT}++; + $wds{$wd1}->[$typ]->{MINWDS}=$nowds if $wds{$wd1}->[$typ]->{MINWDS} > $nowds; +} + +my $lastflt=''; + +print <<'EOF'; +.ig +Index formatting macros, lifted from CSTR #128 +with slight changes to fit the UTP manual +.. +.so utp.mac +.\" Precedes each index term +.de XX +.br +.ti -.2i +.ne 2 +.. +.de ZZ +.br +.. +.\" Header between letters +.de YY +.sp 1.5 +.ne 3 +.ce +- \\$1 - +.sp .5 +.. +.Se "" "Index" NONE +.af PN 1 +.nr PS 8 +.nr VS 9 +.\" Just do one column for nroff... +.if t .2C +.na +.in .2i +.hy 0 +EOF + +foreach my $key (sort {lc($a) cmp lc($b)} keys %wds) +{ + my $flt=lc(substr($key,0,1)); + + $lastflt=$flt,print ".YY $flt ",uc($flt),"\n" if ($flt ne $lastflt and $flt ne '!'); + + foreach my $j (1..4) + { + next if !defined($wds{$key}->[$j]); + + my $wd=$wds{$key}->[$j]; + + if ($wd->{WCT}==1) + { + if ($wd->{CT}==1) + { + # simple entry - one word, one entry + + doref($wd->{WORDS},'XX',0,0,0,0); + } + else + { + # multi... + + if ($wd->{MINWDS} == 2) + { + # The 2 words match the keys + + doref($wd->{WORDS},'XX',0,0,0,0); + doref($wd->{WORDS},'ZZ',2,1,9999,0); + } + else + { + # 2 words math, create a leader + + doref($wd->{WORDS},'XX',0,0,0,2); + doref($wd->{WORDS},'ZZ',2,0,9999,0); + } + } + } + else + { + # Only 1 word match + + if ($wd->{MINWDS} == 1 or exists($wd->{WORDS}->{''})) + { + # The words matches the keys + + doref($wd->{WORDS},'XX',0,0,0,0); + doref($wd->{WORDS},'ZZ',1,1,9999,0); + } + else + { + # create a leader + + doref($wd->{WORDS},'XX',0,0,0,1); + doref($wd->{WORDS},'ZZ',1,0,9999,0); + } + + } + } +} + +sub doref +{ + my $w=shift; + my $typ=shift; + my $drop=shift; + my $from=shift; + my $to=shift; + my $leader=shift; + my $outbuf=".$typ\n"; + my $j=-1; + + foreach my $key (sort {lc($a) cmp lc($b)} keys %{$w}) + { + foreach my $e (@{$w->{$key}}) + { + $j++; + next if $j < $from; + last if $j > $to; + + my $ent=$e->{AKEY}; + + if ($drop or $leader) + { + my (@l)=split(' ',$ent); + $ent=join(' ',@l[0..$leader-1]) if $leader; + $ent=join(' ',@l[$drop..$#l]) if $drop; + } + + $ent=~s/\[(.*?)\]/\\f[CW]$1\\f[P]/g; + $ent=~s/\{(.*?)\}/\\f[2]$1\\f[P]/g; + + $ent=" $ent" if $typ eq 'ZZ'; + $ent=~s/,+$//; + + my $ref=$e->{DATA}; + my $refct=0; + my $reftot=$#{$ref}; + + if (!$leader and $#{$ref->[0]} >= 0) + { + $outbuf.="$ent, \\c\n"; + foreach my $r (@{$ref}) + { + my $prefix=''; + + $prefix='-A ,' if $refct < $reftot; + $outbuf.=".pdfhref L -D $r->[0] $prefix -E -- $r->[1]\n"; + $refct++; + } + } + else + { + $outbuf.="$ent\n"; + } + } + } + print $outbuf; +} + +sub Clean +{ + my $skey=shift; + + $skey=~tr/_/0/; + + my $quoted=0; + + if ($skey=~m/%/) + { + $quoted=1; + $skey=~s/%%/QQ0QQ/g; + $skey=~s/%\[/QQ1QQ/g; + $skey=~s/%\]/QQ2QQ/g; + $skey=~s/%\{/QQ3QQ/g; + $skey=~s/%\}/QQ4QQ/g; + $skey=~s/%~/QQ5QQ/g; + } + + $skey=~s/%e/\\/g; # implement troff escape + $skey=~s/~/ /g; # remove tildes + $skey=~tr'%()/[]{}''d; # remove % and font-changing []{} + + if ($quoted) + { # restore literals but without escape charcter + $skey=~s/QQ0QQ/%/g; + $skey=~s/QQ1QQ/[/g; + $skey=~s/QQ2QQ/]/g; + $skey=~s/QQ3QQ/{/g; + $skey=~s/QQ4QQ/}/g; + $skey=~s/QQ5QQ/~/g; + } + + return($skey); +} + +my $end=1; + +my @trans = ( + [M => 1000], [CM => 900], + [D => 500], [CD => 400], + [C => 100], [XC => 90], + [L => 50], [XL => 40], + [X => 10], [IX => 9], + [V => 5], [IV => 4], + [I => 1], +); + +sub firstword +{ + my $wd=shift; + + my ($ret)=split(' ',$wd,2); + $ret=~s/,//; + return $ret; +} + +sub from_roman { + my $r = shift; + my $n = 0; + foreach my $pair (@trans) { + my ($k, $v) = @$pair; + $n += $v while $r =~ s/^$k//i; + } + return $n + + +} diff -uNB a/see.terms b/see.terms --- a/see.terms 1970-01-01 01:00:00.000000000 +0100 +++ b/see.terms 2013-01-10 17:39:56.513311965 +0000 @@ -0,0 +1,16 @@ +drawing [pic] preprocessor %also +extensions to [ms] macros extended ms macros +files, searching within [grep] %also +formatting defaults, [mm] [mm] macros %also +formatting defaults, [ms] [ms] macros %also +graphics [pic] preprocessor %also +integrals [eqn] +keep and release displays %also +macros [mm] and [ms] %also +[mS] macros extended [ms] macros +[nroff] formatter [troff] %also +search [grep] %also +subscripts [eqn] %also +superscripts [eqn] %also +[vi] editor, [ex] commands in [ex] %also +[view] command [vi] editor diff -uNB a/toc.awk b/toc.awk --- a/toc.awk 2003-07-28 02:07:15.000000000 +0100 +++ b/toc.awk 2013-01-10 01:36:25.779873609 +0000 @@ -7,24 +7,26 @@ print ".Se \"\" Contents NONE"; print ".af PN i"; print ".vs 12"; - print ".ta \\n(.luR"; + print ".nr llen \\n(.lu-.25i"; + print ".ta \\n[llen]uR"; print ".nf"; print ".sp 2" } /^Se:/ { - if ( $4 == "Contents" ) + if ( $5 == "Contents" ) next; gsub(/\\f\(CW/, "\\f\(CB" ); gsub(/\\f\[CW\]/, "\\f\[CB\]" ); gsub(/\\fC/, "\\f\[CB\]" ); print ".ps 12\n.sp"; - print $3"\\h'|.25i'\\fB"$4"\\fR "$2; + print $4"\\h'|.25i'\\fB\\c"; + print ".pdfhref L -D "$2" -E -- "$5"\t\\fR"$3; print ".ps 10\n.sp"; } /^Ah:/ { gsub(/\\f\(CB/, "\\f\(CW" ); gsub(/\\f\[CB\]/, "\\f\[CW\]" ); - print "\\h'|.25i'"$3""$2; + print ".pdfhref L -D "$2" -P \\h'|.25i' -E -- "$4""$3; } # default (skip index entries) { next; } diff -uNB a/utp_book.t b/utp_book.t --- a/utp_book.t 2003-07-29 04:25:47.000000000 +0100 +++ b/utp_book.t 2013-01-10 01:00:44.283017405 +0000 @@ -8,6 +8,14 @@ * Still a couple of minor warnings under groff 1.18 ************************************************************** .. +.nr ixno 0 1 +.ds PDFHREF.COLOUR 0.0 0.3 0.9 +.ds PDFHREF.TEXT.COLOUR pdf:href.colour +.defcolor pdf:href.colour rgb \*[PDFHREF.COLOUR] +.nr PDFOUTLINE.FOLDLEVEL 1 +.pdfinfo /Title Unix Text Processing +.pdfinfo /Author Dale Dougherty and Tim O'Reilly +.pdfview /PageMode /UseOutlines .nr chapter_page2 1 .so front.t .nr chapter_page2 1 @@ -95,3 +103,5 @@ .bp .nr chapter_page2 1 .so utp_ix.t +.pdfsync + diff -uNB a/utp.mac b/utp.mac --- a/utp.mac 2003-07-28 02:07:15.000000000 +0100 +++ b/utp.mac 2013-01-09 18:40:51.753703508 +0000 @@ -53,6 +53,7 @@ \# Special A-head for UTP \# .de utp_Ah +.pdfbookmark 2 \\$1 .sp 26p .RT .ne 6 @@ -65,7 +66,7 @@ .lg .sp 18p .ns -.if \\n[Ref] .tm Ah: \\n(PN \\$1 +.if \\n[Ref] .tm Ah: \\*[PDFBOOKMARK.NAME] \\n(PN \\$1 .. \# \# The [ABCD]-head macros @@ -94,6 +95,7 @@ .als Ah standard_Ah \# .de Bh \" B-head. $1: title +.pdfbookmark 3 \\$1 .sp 23p .RT .ne 6 @@ -163,6 +165,7 @@ .\} .ds chapter_name \\$2 .ie !'\\$1'' \{. \" If we have a section number +. pdfbookmark 1 \\$1. \\$2 . ds chapter_head \\$1 . nr is_alpha 0 . if '\\$1'A' .set_section 1 @@ -198,6 +201,7 @@ .\} .el \{. \" Illegal Chapter Appendix number . nr section 0 +. pdfbookmark 1 \\$2 . \" Might be Preface, etc. so no error diag. .\} .nr chapter_page2 1 \" Next page starts a chapter, so no header @@ -215,10 +219,10 @@ .nr table_num 0 \" Reset table number .format_section "\\$1" "\\$2" \\$3 \\$4 .ie '\\$1'' \{\ -.ie '\\$2'' .if \\n[Ref] .tm Se: \\n(PN \\$3 -.el .if \\n[Ref] .tm Se: \\n(PN \\$1 \\$2 +.ie '\\$2'' .if \\n[Ref] .tm Se: \\*[PDFBOOKMARK.NAME] \\n(PN \\$3 +.el .if \\n[Ref] .tm Se: \\*[PDFBOOKMARK.NAME] \\n(PN \\$1 \\$2 .\} -.el .if \\n[Ref] .tm Se: \\n(PN \\$1 \\$2 +.el .if \\n[Ref] .tm Se: \\*[PDFBOOKMARK.NAME] \\n(PN \\$1 \\$2 .. \# \# Set section number for alphabet chapters (appendices) @@ -363,7 +367,7 @@ . if !'\\$1'R' .if !'\\$1'r' .if !'\\$1'' \ . tm Ls: Need A, a, B, N, R, or r as type .nr l\\n+[l0] 0 1 -.ie '\\$1'' \{. \" set defaults +.ie '\\$1'' \{. \" set defaults . if '\\n[l0]'1' .af l\\n[l0] 1 \"numberic at 1st level . if '\\n[l0]'2' .af l\\n[l0] a \"alpha at 2nd level . if '\\n[l0]'3' .af l\\n[l0] i \"roman at 3rd level @@ -384,7 +388,7 @@ . if '\\$1'r' .af l\\n[l0] i .\} .ie !'\\$2'' .nr i\\n[l0] \\$2 \"set list indent -.el .nr i\\n[l0] 5 \"default indent +.el .nr i\\n[l0] 5 \"default indent .RS .. .de Li \" List start $1 == 0: no blank line preceding @@ -695,6 +699,16 @@ 'po \\n[PO]u 'sp |\\n[page-end]u .. +.de ix +.ie '\\n(.z'' \{\ +. if !'\\$1'%end' \{\ +. ds ixbk ix:bm\\n+[ixno] +. pdfhref M -N \\*[ixbk] +. \} +. if \\n[Ref] .tm ix: \\$* \\n% \\*[ixbk] +.\} +.el \\!.ix \\$* +.. \# \# Set defaults for UTP \#