for file in *html
  do
  echo -n $file;
  sed -n -r '/Installed [Pp]rograms?:/,/<\/span>/p' $file |\
      sed -e 's/</\n</g' -e 's/>/>\n/g' |\
      grep -v '^<.*>$' | tr -s '\n' ' '
  echo; echo
  done | sed 's/(.[^(]*)//g' 

---

For a single HTML page execute:

sed -n -r '/Installed [Pp]rograms?:/,/<\/span>/p' diffutils.html |\
  sed -e 's/</\n</g' -e 's/>/>\n/g' |\
  grep -v '^<.*>$' | tr -s '\n' ' ' 
  # Pipe output to sed 's/(.[^(]*)//g' to remove "(link to xxx)"

Output:
Installed programs: cmp, diff, diff3, and sdiff 



-- 
http://lists.linuxfromscratch.org/listinfo/lfs-dev
FAQ: http://www.linuxfromscratch.org/faq/
Unsubscribe: See the above information page

Reply via email to