#!/bin/bash

set -e
SD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
CD=postgresql.chk
TD=/tmp/postgresql.chk
rm -rf $TD; mkdir $TD

rm -rf $CD
git clone git://git.postgresql.org/git/postgresql.git $CD
cd $CD
git checkout REL9_5_3
cd doc/src/sgml
git apply $SD/xhtml-like-dsssl.patch

# Internal functions
###############################################################################
compareXhtml() {
    # Compare texts as html's differ in id's and not significant white-space characters
    # Convert html to text in the same directory for inner links to be the same
    [ -d $TD/xhtml ] && rm -rf $TD/xhtml
    cp -R $1 $TD/xhtml
    [ -d $1-text ] && rm -rf $1-text
    mkdir $1-text
    for f in $TD/xhtml/*.html; do fn=`basename $f`; lynx $f --dump >$1-text/$fn; done
    rm $1-text/bookindex.html # Exclude very different bookindex
     # remove random generated id
    for f in $1-text/*.html; do perl -i -pe 's/(\.html#)(ftn\.)?(id)?[a-z]+[0-9]+/$1/gi' $f; done

    rm -rf $TD/xhtml
    cp -R $2 $TD/xhtml
    [ -d $2-text ] && rm -rf $2-text
    mkdir $2-text
    for f in $TD/xhtml/*.html; do fn=`basename $f`; lynx $f --dump >$2-text/$fn; done
    rm $2-text/bookindex.html # Exclude very different bookindex
    # remove random generated id
    for f in $2-text/*.html; do perl -i -pe 's/(\.html#)(ftn\.)?(id)?[a-z]+[0-9]+/$1/gi' $f; done

    diff -u -b -B -r -I '^\s*_\+\s*$' $1-text/ $2-text/
}

rm -rf html html-stamp
time make html # Original
rm -rf $TD/html-sgml0
cp -R html $TD/html-sgml0
for f in $TD/html-sgml0/*.html; do fn=`basename $f`;
    perl -i -0777 -pe 's/<B\s*>(Note|Tip|Important|Compatibility|Author|Release Date):\s*<\/B\s*>/\<h3>$1<\/h3>/g' $f; # Different "Note", "Tip", "Important", ... presentation
    perl -i -0777 -pe 's/((Example|Table|Figure)\s+[A-Z0-9]+)-(\d+)/$1.$3/g' $f; # Consistent numbering: "Example 39-1" -> "Example 39.1"
    perl -i -0777 -pe 's/(<DT\s*>)([0-9A-Z.\s]+)(<A\s+[^>]*>)/$1$3$2/g' $f; # TOC item: <dt>29.1. <a>Reliability</a></dt> -> <dt><a>29.1. Reliability</a></dt>
    perl -i -0777 -pe 's/(<TD\s*>)&nbsp;(<\/TD)/$1 $2/g' $f; # Table cell: <TD>&nbsp</TD> -> <TD> </TD>
    perl -i -0777 -pe 's/(<DIV\s+CLASS="(REFNAMEDIV|TOC)")(.*?)(<\/DIV\s*>)/"$1".($3 =~ s!&nbsp;--&nbsp;! &mdash; !sgr)."$4"/sge' $f; # refentry purpose " name -- purpose ... " -> " name &mdash; purpose ... "
    
    perl -i -0777 -pe 's/<P\s*>\s*<P\s*>\s*<\/P\s*>\s*<\/Ps*>\s*(?=<(UL|OL))//sg' $f; # <P><P></P></P><UL> Makes internal list non-indented (Remove empty P's) (lynx specifics)
    perl -i -0777 -pe 's/<P\s*>\s*<\/P\s*>\s*(?=<\/Ps*>\s*<(UL|OL))//sg' $f; # <P></P></P><UL> Makes internal list non-indented (Remove empty P) (lynx specifics)
    perl -i -0777 -pe 's/<H3\s+CLASS="FOOTNOTES"\s*>\s*Notes\s*<\/H3\s*>/<HR\/>/sg' $f; # Replace <H3>Notes</H3> with <HR/>
    perl -i -0777 -pe 's/(<A\sHREF="[^"]+"\s*>)step (\d+)/$1Step $2/sg' $f; # Replace <A>step N</A> with <A>Step N</A>
    
    perl -i -pe 's/(\.html#)([\w.-]+)/$1\L$2/g' $f; # Lower-case anchors (xplang-install.html#XPLANG-INSTALL-EXAMPLE -> xplang-install.html#xplang-install-example)
done
###############################################################################
rm -rf html xslthtml-stamp
time make xslthtml # Fast
rm -rf $TD/xhtml-sgml1
cp -R html $TD/xhtml-sgml1
for f in $TD/xhtml-sgml1/*.html; do fn=`basename $f`;
    perl -i -0777 -pe 's/\xC2\xA0/ /ig' $f; # Replace non-breakable spaces with spaces (they are not present in DSSSL output)
    perl -i -0777 -pe 's/(‘|’)/"/ig' $f; # Replace typographic single quotation marks with the dumb ones
    perl -i -0777 -pe 's/(“|”)/"/ig' $f; # Replace typographic double quotation marks with the dumb ones
    perl -i -0777 -pe 's/ → /-&gt;/ig' $f; # Replace typographic arrows with the plain ones
done
###############################################################################

compareXhtml $TD/html-sgml0 $TD/xhtml-sgml1 

echo 'OK'
