for file in *html
do
echo -n $file;
sed -n -r '/Installed [Pp]rograms?:/,/<\/span>/p' $file |\
sed -e 's/</\n</g' -e 's/>/>\n/g' |\
grep -v '^<.*>$' | tr -s '\n' ' '
echo; echo
done | sed 's/(.[^(]*)//g'
---
For a single HTML page execute:
sed -n -r '/Installed [Pp]rograms?:/,/<\/span>/p' diffutils.html |\
sed -e 's/</\n</g' -e 's/>/>\n/g' |\
grep -v '^<.*>$' | tr -s '\n' ' '
# Pipe output to sed 's/(.[^(]*)//g' to remove "(link to xxx)"
Output:
Installed programs: cmp, diff, diff3, and sdiff
--
http://lists.linuxfromscratch.org/listinfo/lfs-dev
FAQ: http://www.linuxfromscratch.org/faq/
Unsubscribe: See the above information page