#!/usr/bin/perl -w
#
#	pdfman		: given a troff document man page add pdf meta
#	Deri James	: Wed 05 Mar 2014
#

use strict;

my (@lines)=(<>);
chomp(@lines);
my $title='';
my @levels=(0,0,0,0);
my @manpath;
my $mdocpunct=".,:;()[]?!";
my $bmct=0;
my %bm;
my $web=exists($ENV{MY_URL})?$ENV{MY_URL}:'';
$web.='/' if $web and substr($web,-1) ne '/';
my $pod2man=0;
my $InVB=1;
my $TitleNm='';
my $new=$ENV{PDFMAN_NEW} || 1;

BuildManPath();

print ".ds PDFHREF.COLOUR 0.0 0.3 0.9\n.defcolor pdf:href.colour rgb \\*[PDFHREF.COLOUR]\n.ds PDFHREF.TEXT.COLOUR pdf:href.colour\n";

foreach my $j (0..$#lines)
{
    my $l=$lines[$j];
    next if !defined($l);
    
    $pod2man=1 if $j < 15 and $l=~m/pod::man/i;
    
    if ($l=~m/^\.\\["#]/)
    {
	Put($l);
	next;
    }
    
    my $foundur=0;
    $InVB=1 if $l=~m/^\.Vb/;
    $InVB=0 if $l=~m/^\.Ve/;
    
    # man
    
    if ($l=~m/^\.SH (.*)/)
    {
	Over(1,$1);
	Put($l);
	next;
    }
    elsif ($l=~m/^\.Sh (.*)/)
    {
	if ($1 eq 'NAME')
	{
	    Put($l);
	    Over(1,$1,1);
	    next;
	}
	
	Over(1,$1,1);
	Put($l);
	next;
    }
    elsif ($l=~m/^\.SS (.*)/)
    {
	Over(2,$1);
	Put($l);
	next;
    }
    elsif ($l=~m/^\.Ss (.*)/)
    {
	my $t=$1;
	$t=$1 if ($t=~m/"(.*?)"/);
	Over(2,$t,1);
	Put($l);
	next;
    }
    elsif ($l=~m/^\.T[Pp]/)
    {
	Over(-3,$lines[$j+1]);
	Put($l);
	next;
    }
    elsif ($l=~m/^\.IX Item (.*)/)
    {
	# Used by Pod2man
	Over(-4,$1);
	Put($l);
	next;
    }
    elsif ($l=~m/^\.IX Title "(.*) (\d)"/)
    {
	# Used by Pod2man
	$TitleNm=$1;
	Put($l);
	next;
    }
    elsif ($l=~m/^\.TH (\S+) (\S+)/)
    {
	$title=".pdfinfo /Title $1($2)\n";
	Put($l);
	next;
    }
    
    if ($l=~m/^\.UR (\S+)/)
    {
	my $desc='';
	my $dest=$1;
	$dest=~s/\\\://g;
	my $affix='';
	$foundur=1;
	
	for (my $i=$j+1;$i<$j+4;$i++)
	{
	    if ($lines[$i]=~m/^\.(?:ME|UE) ?(.*)/)
	    {
		$affix=" -A \"$1\"" if $1;
		$lines[$i]=undef;
		last;
	    }
	    else
	    {
		$desc.=' ' if $desc;
		$desc.=$lines[$i];
		$lines[$i]=undef;
	    }
	}
	
	$l=qq(.pdfhref W -D "$dest"${affix} -- $desc);
	Put($l);
	next;
    }
    
    if ($l=~m/^\.MT (\S+)/)
    {
	my $desc='';
	my $dest=$1;
	$dest=~s/\\\://g;
	my $affix='';
	
	for (my $i=$j+1;$i<$j+10;$i++)
	{
	    if ($lines[$i]=~m/^\.(?:ME|UE) ?(.*)/)
	    {
		$affix=" -A \"$1\"" if $1;
		$lines[$i]=undef;
		last;
	    }
	    else
	    {
		my $l=$lines[$i];
		$l=~s/\... //;
		$desc.=' ' if $desc;
		$desc.=$l;
		$lines[$i]=undef;
	    }
	}
	
	$l=qq(.pdfhref W -D "mailto:$dest"${affix} -- $desc);
	Put($l);
	next;
    }
    
    if ($l=~s/^\.Xr (\w+) (\S+) ?(.*?)\n?/ManURL('M','',$1,$2,$3)/e) {Put($l); next;};
    if ($l=~s/^\.Sx (.+)\n?/PDFlink($1,$1)/e) {Put($l); next;};
    if ($l=~s[(\S*)(http(?:s?)://)(\S+)][WWW($1,$2,$3)]gem) {Put($l); next;};
    
    if ($new==1)
    {
	
# 	if ($l=~m/^\./) {Put($l), next;}
	
	my (@wds)=split(' ',$l);
	my $res='';
	my $inst=(substr($wds[0],0,1) eq '.')?1:0;
    
	foreach my $j (0..$#wds)
	{
	    my $wd=$wds[$j];
	    next if !defined($wd);
	    my $started=length($res);
	    $started=0 if $started and substr($res,-1) eq "\n";

	    if ($inst and $j<$#wds and substr($wds[$j+1],0,1) eq '(')
	    {
		my $t1=$wd.$wds[$j+1];
		my $pre=Parse2(\$t1);
		my $t2=$t1;
		
		if ($t1=~m/^(\w+)\((\d\w?)\)(.*)$/)
		{
		    $t2=URLMan('M',$started,$pre,$1,$2,$3);
		}

		if ($t2 ne $t1)
		{
		    $res.=$t2;
		    $wds[$j+1]=undef;
		}
		else
		{
		    $res.=' ' if $started;
		    $res.=$wd;
		}
		next;
	    }

	    my ($pre,$w,$post)=Parse($wd);
	    
	    if ($w=~m/^(\w+)\((\d\w?)\)$/)
	    {
		$res.=URLMan('M',$started,$pre,$1,$2,$post);
		next;
	    }
	    
	    if (!$InVB and $pod2man)
	    {
		if ($w=~m/^(perl\w+)$/)
		{
		    $res.=URLMan('P2',$started,$pre,$1,1,$post);
		    next;
		}
		
		if ($w=~m/^(\w+(?:::\w+)+)$/)
		{
		    $res.=URLMan('P',$started,$pre,$1,3,$post);
		    next;
		}
	    }
	    
	    $res.=' ' if $started;
	    $res.=$wd;
	}
	
	Put($res);
	next;
    }
    
    my $txted=0;

    if (!$InVB and $pod2man and $l!~m/^\./)
    {
	while ($l=~s/(?<!-- )((?:\\\&)?(?: |\\f[RBI]|\\f\(..))?(?<!\/)(perl\w+)(?!\w|\()(\S*)\s*(.*)/ManURL('P2',$1,$2,1,$3,$4)/gem)
	{
	    $txted=0;
	}
    }
    
    while ($l=~s/(.*?)(?<!\\f.)\b((?:\\f.)?\w+(?:\\f.|\\\|))\((\d\w?)\)(?!:DUFF:)(\S+)?\s*(.*)/ManURL('M',$1,$2,$3,$4,$5)/ge)
    {
	$txted=0;
    }
    
    if (!$InVB and $pod2man and $l!~/^\./)
    {
	while ($l=~s/(.*?)(?<!-- |..\/|.::)\b((?:\\f[BIR])?\w+(?:::\w+)+)(?!\w|\))(\S*)\s*(.*)/ManURL('P',$1,$2,3,$3,$4)/gem)
	{
	    $txted=0;
	}
    }
    

    $l="$1\n$2" if $l=~m/(.+)(\.pdf.^)/; 
    $l=~s/\n\n/\n/g;
    my $txt=Clean($l);

#     print STDERR "Cln:$txt\n";
    
    while ($txt=~s/(.*?)\b(\w+)\((\d\w?)\)(?!:DUFF:)(\S+)?\s*(.*)/ManURL('M',$1,$2,$3,$4,$5)/ge)
    {
	$txted=1;
    }
    
    if (!$InVB and $pod2man and $l!~/^\./)
    {
	while ($txt=~s/(.*?)(?<!-- |..\/|.::)\b(\w+(?:::\w+)+)(?!\w|\))(\S*)\s*(.*)/ManURL('P',$1,$2,3,$3,$4)/gem)
	{
	    $txted=1;
	}
# 	$txt=~s/:DUFF:/::/g;
    }
    
    if ($txted)
    {
	Put($txt);
    }
    else
    {
	Put($l);
    }
}

print ".pdfview /PageMode /UseOutlines\n";
print $title if $title;

sub URLMan
{
    my $type=shift;
    my $started=shift;
    my $pre=shift;
    my $cmd=shift;
    my $osec=shift;
    my $post=shift;
    my $sec=$osec;
    my $res;
    my $st=($started)?" \\c":"\\c";
    
    if (!($sec=CheckMan($cmd,$sec)))
    {
	$pre=" $pre" if $started;
	return("$pre$cmd($osec)$post") if $type eq 'M';
	return("$pre$cmd$post");
    }
    
    $pre=" -P $pre" if $pre;
    $post=" -A $post" if $post;
    
    if ($type eq 'P')
    {
	$res=qq($st\n.pdfhref W -D "${web}man:/${sec}/$cmd"$pre${post} -- $cmd\n);	
    }
    else
    {
	$res=qq($st\n.pdfhref W -D "${web}man:/${sec}/$cmd"$pre${post} -- $cmd($sec)\n);	
    }
    
    return($res);
}

sub Parse
{
    my $w=shift;
    
    my $pre='';
    my $post='';
    
    while (my $p=Parse2(\$w)) {$pre.=$p};
    my ($wd,$t)=split(/\\/,$w,2);
    $t="\\$t" if $t;
    $wd='' if !defined($wd);
    if (substr($wd,-1) eq ',')
    {
	chop($wd);
	$post.=',';
    }
    while (my $p=Parse2(\$t)) {$post.=$p};
    
    return($pre,$wd,$post);
}

sub Parse2
{
    my $w=shift;
    
    return '' if !defined($$w);
    return '' if $$w!~m/^\\/;
    return $1 if $$w=~s/^(\\s(?:'[-0-9.]+?.?'|[-0-9]+))//;
    return $1 if $$w=~s/^(\\f(?:B|I|R|P|\(..|\[.*?\]))//;
    return $1 if $$w=~s/^(\\(?:\&|\||\%))//;
    return $1 if $$w=~s/^(\\.(?:'.*?'|\(..|\[.*?\]))//;
    return '';
}
sub Put
{
    my $txt=shift;
    
    $txt=~s/:DUFF://g;
    $txt=~s/:\\&:/::/g;
    
    $txt.="\n" if substr($txt,-1) ne "\n";
    print "$txt";
    
}

sub WWW
{
    my $prefix=shift || '';
    my $proto=shift || '';
    my $url=shift || '';
    my $affix='';
    $affix=$1 if $url=~s/([\].,)>!?]+)$//;
    $prefix="-P \"$prefix\"" if $prefix;
    $url="$proto$url";
    $affix=qq(-A "$affix\\c") if $affix;
    return("\\c\n.pdfhref W -D \"$url\" $affix $prefix -- $url\n");
}

sub Over
{
    my $lev=shift;
    my $txt=Clean(shift);
#    my $txt=shift;
    my $nm=shift || 0;
    my $tag='';
    
    $txt=~s/\\f\(..//g;
    $txt=~s/\\f\[.*?\]//g;
#     $txt=~s/\(/\\\(/g;
#     $txt=~s/\)/\\\)/g;
    $txt=~s/\\(?>!e)/\\e\\/g;
    $txt=substr($txt,0,-1) if substr($txt,-1) eq '\\';
    $txt=~s/\\\(en|\\\[en\]/-/g;
    $txt=~s/\\-/-/g;
    $txt=~s/\\\*/\\e\*/g;#
    $txt=~s/\\ / /g;
    
    if ($txt=~m/^\\\\&\.(ESC.*)/)
    {
# 	print ".ds _deri \\*[$1]\n.substring _deri 5\n.pdfclean _deri\n.tm dj \\*[_deri]  :  \\*[pdfcleaned]\n";
    }
    if ($txt=~m/^\\\\&\.ESC.*? (.*)/)
    {
	$txt="$1";
# 	$txt=~s/\\/\\\\\\\\/g;
	$txt="\\\\&\\e$txt";
    }
    $txt="\\\\&$1" if $txt=~m/^\\\\&\.\w* (.*)/;
    
    $txt=~s/^"(.*)"$/$1/;
    
    my $sgn=($lev<0)?-1:1;
    $lev=abs($lev);
    
    if ($nm)
    {
	$tag=$txt;
	$tag=~tr[ ()[]/][_];
# 	$tag="pdf:cst".++$bmct;
# 	$bm{$txt}=$tag;
	$tag=qq(-T $tag);
    }
    
    $levels[$lev]=1;
    
    foreach my $j ($lev+1..$#levels) {$levels[$j]=0;};
    
    if ($lev > 1 and $levels[$lev-1]==0)
    {
	$lev--;
    }
    
    $lev*=$sgn;
    
    $txt="\\&$txt";
    $txt=~s/\\e/\\\\\\\\e/g;
    $txt=~s/\(/\\\\\\\(/g;
    $txt=~s/\)/\\\\\\\)/g;
    
    print ".pdfbookmark $tag $lev \"$txt\"\n";
}

sub Clean
{
    my $t=shift;
    
    $t=~s/\\\%//g;
    $t=~s/\\f(?:B|I|R|P|\[.*?\]|\(..)(?:\\\|)?//g;
   
#    $t=~s/\\e//g;

    if ($t=~m/^\.(BI|IB|RI|IR|BR|RB|B|I|FN) (.*)/)
    {
	return($2);
# 	return(join('',WordSplit($2,' ')));
    }
    
    $t=~s/^(?:\.pdf)(\.\S+ )/\\&$1/;
    
    return($t);
}

sub WordSplit
{
    my $line = shift;
    my $sep = (shift or ',');
    
    return () unless $line;
    
    my @cells;
    $line =~ s/\r?\n$//;
    
    my $re = qr/(?:^|$sep)(?:"([^"]*)"|([^$sep]*))/;
    
    while($line =~ /$re/g) {
	my $value = defined $1 ? $1 : $2;
	push @cells, (defined $value ? $value : '');
	$cells[$#cells].=' ' if substr($cells[$#cells],-1) eq '\\';
    }
    
    return @cells;
}

sub ManURL
{
    my $type=shift;
    my $before=shift || '';
    my $nm=shift;
    my $osec=shift || '';
    my $suffix=shift || '';
    my $rest=shift || '';
    my $sec=$osec;
    my $res;
    
    return('') if !defined($nm);
    
    $before.="\n" if ($before and $before ne "\n");

    if (!($sec=CheckMan($nm,$sec)))
    {
	return("$before$nm($osec\\&)$suffix$rest\n") if $type eq 'M';
	$nm=~s/::/:\\&:/g;
	$nm=~s/perl/p\\&erl/;
	return("$before$nm$suffix $rest");
    }
    
    $suffix=" -A $suffix" if $suffix;
    $rest="\n$rest" if $rest;
    
    if ($type eq 'P')
    {
#	$nm=~s/::/:\\&:/g;
	$res=qq(${before}.pdfhref W -D "${web}man:/${sec}/$nm"${suffix} -- $nm$rest);	
    }
    else
    {
	$res=qq(${before}.pdfhref W -D "${web}man:/${sec}/$nm"${suffix} -- $nm($sec):DUFF:$rest);	
    }
    
    return($res);
}

sub BuildManPath
{
    if (open(F,"</etc/man.config"))
    {
	while (<F>)
	{
	    chomp;
	    
	    if (substr($_,0,8) eq "MANPATH\t")
	    {
		push(@manpath,substr($_,8));
	    }
	}
    }
}

sub CheckMan
{
    my $nm=shift;
    my $sec=shift;
    
    $nm=~s/\\f.//g;
    $nm=~s/\\\|//g;
    
    return 0 if $nm eq $TitleNm;
    
    foreach my $path (@manpath)
    {
	my (@r)=glob qq("$path/man$sec*/${nm}.*");
	
	if ($#r > -1)
	{
	    $r[0]=~m/$path\/man(.+?)\/$nm/;
	    return $1;
	}
    }
    
    return 0;
}

sub PDFlink
{
    my $dest=shift;
    my $text=shift || $dest;
    my $suffix='';
    
    my (@cells)=WordSplit($text,' ');
    
    while (index($mdocpunct,$cells[$#cells]) >= 0)
    {
	$suffix=pop(@cells).$suffix;
    }
    
    $text=join(' ',@cells);
    $dest=$text;
    $dest=~s/^\"?(.*)\"?/$1/;
    $dest=~tr[ ()[]/][_];
    
    $dest=$bm{$dest} if (exists($bm{$dest}));
    $suffix=qq(-A "$suffix" ) if $suffix;
    
    return(".pdfhref L -D $dest ${suffix} -- $text");
}

