Hi Holger, here is now the final patch, I it think can be comitted. I removed some redundancy and added a few comments. The "*-manual.xml" file is the same as with the older patch.
I cheched the pdf, anything seems to be ok (and the extra spaces are gone :-) ). Regards, Andi PS: Do not forget to run find . -name "*.po" | xargs sed -i "s/[ \t]*<\//<\//g" after applying!
Index: documentation/scripts/get_images =================================================================== --- documentation/scripts/get_images (revision 62761) +++ documentation/scripts/get_images (working copy) @@ -41,12 +41,12 @@ } sub replace () { - open(FILE, "< $file") or die "Can't open $file perhaps not in the correct dir?"; + open(FILE, "< $file") or die "Can't open $file perhaps not in the correct dir? Error: $!"; undef $/; my $local = <FILE>; close(FILE); # look for images... - if ( $local =~ "m#<imagedata fileref='/".$path."/\w+\?action=AttachFile&do=get&target=([^']+)'/>#" ) { + if ( $local =~ "m#<imagedata fileref='/".$path."/\\w+\?action=AttachFile&do=get&target=([^']+)'/>#" ) { # ..and replace the paths $local =~ s#<imagedata fileref='([^'"<>]+)'/>#create($1)#eg; open(FILE, "> $file") or die "Can't open $file for writing"; Index: documentation/scripts/get_manual =================================================================== --- documentation/scripts/get_manual (revision 62761) +++ documentation/scripts/get_manual (working copy) @@ -36,7 +36,7 @@ # the last but one sed "preserves" the 2nd matched regex # the last sed does the same as dos2unix # head at the end chops of the last two lines with the Category:Permalink entry -GET "${url}AllInOne?action=raw"|sed "s%<<Include(%%g" | sed "s%)>>%%g" | sed "s%$path1%%g" |sed 's/.$//'|head -n -2> id +GET -H User-Agent: "${url}AllInOne?action=raw"|sed "s%<<Include(%%g" | sed "s%)>>%%g" | sed "s%$path1%%g" |sed 's/.$//'|head -n -2> id for i in `cat id` ; do TARGET=`echo "${i}" |sed "s/\(.*\)\/\(.*\)/\2/" `.xml @@ -51,23 +51,24 @@ # - the last sed command deletes the first 4 lines GET "${url}${i}?action=show&mimetype=text/docbook" | sed "s/\$DEBIAN_EDU_DOC_BUILDDATE/<code>$DEBIAN_EDU_DOC_BUILDDATE<\/code>/" | + # replace tags: sed "s%code>%computeroutput>%g" | sed "s%/htdocs/rightsidebar/img/%./images/%g" | + # remove final tag: sed "s%</article>%%" | - sed "s#</revhistory>##g" | - sed "s%</authorinitials>%%" | - sed "s#<revremark>\(.*\)</revremark>##g" | - sed "s#<authorinitials>\(.*\)</authorinitials>##g" | - sed "s#</articleinfo>##g" | - sed "s#</revision>##g" | + # remove tags and enclosed content: + sed "s#<articleinfo>\(.*\)</articleinfo>##g" | sed "s%<para><ulink url='http://wiki.debian.org/CategoryPermalink#'>CategoryPermalink</ulink> </para>%%" | - sed "s%<\/%\n<\/%g" | + # introduce line breaks: sed "s%<title>%\n<title>%g" | + sed "s%<\/title>%\n<\/title>%g" | sed "s%<section>%\n\n<section>%g" | + sed "s%<\/section>%\n<\/section>%g" | sed "s%<para>%\n<para>%g" | - sed "s%</date>\(.*\)\$%%g" | + sed "s%<\/para>%\n<\/para>%g" | sed "s%FIXME%\nFIXME%g" | - sed '1,4d' > $TARGET + # cut off first line: + sed '1d' > $TARGET if [ "$(grep -v FIXMEs $TARGET |grep FIXME)" != "" ] ; then echo "----------------------------------" >> $TMPFILE echo ${url}${i} >> $TMPFILE @@ -79,9 +80,10 @@ # (replace with the second match of the regular expression) sed -i "s/\(.*\)\/\(.*\)/\2/" id -# add id= to <section>s +# add id= to <section>s and a linebreak at the end for i in `cat id` ; do sed -i "0,/<section>/ s/<section>/<section id=\"$i\">/" ${i}.xml + sed -i "$ s#>#>\n#" ${i}.xml done # paste it together @@ -104,8 +106,10 @@ # make it a docbook article again sed -i "1,/</ s#<#<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"><article><articleinfo><title>$DEBIAN_EDU_DOC_TITLE</title></articleinfo>\n<#" $xmlfile sed -i "$ s#>#>\n</article>#" $xmlfile -# remove the first two empty lines -sed -i "1,2d" $xmlfile + +# remove the first empty line +sed -i "1d" $xmlfile + # clean it further TMPFILE2=$(mktemp) xmllint $xmlfile > $TMPFILE2