I'm part of the upstream for catdoc, so as Nick Bane has suddenly been diverted by RealLife and cannot make the changes for sponsoring, I've made the changes upstream and NMU'd the Debian package.
The changes principally involve fixing tarball.sh as the current hacky way that the catdoc orig tarball is created and then applying the patch for the buffer overflow. (The next upstream release of catdoc is scheduled to use cmake which will remove all of these historical autotools problems.) The upstream changes are confined to removing the .pc directory, updating the package version string and then applying the patch directly to the upstream. I'll file the unblock request with the attached diff once catdoc has been in unstable for a few days. -- Neil Williams ============= http://www.linux.codehelp.co.uk/
diffstat for catdoc-0.94.3 catdoc-0.94.4 .pc/.quilt_patches | 1 .pc/.quilt_series | 1 .pc/.version | 1 .pc/applied-patches | 1 .pc/debian-changes-0.94.3-1/doc/catdoc.txt | 258 ---------------------------- .pc/debian-changes-0.94.3-1/doc/catppt.txt | 51 ----- .pc/debian-changes-0.94.3-1/doc/xls2csv.txt | 85 --------- configure | 2 configure.in | 2 debian/changelog | 12 + doc/catdoc.1 | 2 doc/catppt.1 | 2 doc/wordview.1 | 2 doc/xls2csv.1 | 2 src/xlsparse.c | 4 tarball.sh | 18 + 16 files changed, 34 insertions(+), 410 deletions(-) diff -Nru catdoc-0.94.3/configure catdoc-0.94.4/configure --- catdoc-0.94.3/configure 2012-06-10 14:02:08.000000000 +0100 +++ catdoc-0.94.4/configure 2012-12-03 18:01:26.000000000 +0000 @@ -541,7 +541,7 @@ fi -catdoc_version=0.94.2 +catdoc_version=0.94.4 # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 diff -Nru catdoc-0.94.3/configure.in catdoc-0.94.4/configure.in --- catdoc-0.94.3/configure.in 2012-06-10 12:35:25.000000000 +0100 +++ catdoc-0.94.4/configure.in 2012-12-03 18:01:31.000000000 +0000 @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(acconfig.h) -catdoc_version=0.94.2 +catdoc_version=0.94.4 dnl Checks for programs. AC_PROG_CC case ${CC} in diff -Nru catdoc-0.94.3/debian/changelog catdoc-0.94.4/debian/changelog --- catdoc-0.94.3/debian/changelog 2012-06-10 13:51:32.000000000 +0100 +++ catdoc-0.94.4/debian/changelog 2012-12-03 18:50:42.000000000 +0000 @@ -1,3 +1,15 @@ +catdoc (0.94.4-1.1) unstable; urgency=low + + * Non-maintainer upload. + * New upstream release to remove .pc subdirectory from + the orig tarball (Closes: #692073). Includes updating + version strings in generated manpages. + * Remove extra ';' in src/xlsparse.c which turned for loop in + xlsparse into a buffer overflow (Closes: #692076), applies + patch by Olly Betts <o...@survex.com>. + + -- Neil Williams <codeh...@debian.org> Mon, 03 Dec 2012 18:22:47 +0000 + catdoc (0.94.3-1) unstable; urgency=low * Declare new upstream release diff -Nru catdoc-0.94.3/doc/catdoc.1 catdoc-0.94.4/doc/catdoc.1 --- catdoc-0.94.3/doc/catdoc.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/catdoc.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH catdoc 1 "Version 0.94.2" "MS-Word reader" +.TH catdoc 1 "Version 0.94.4" "MS-Word reader" .SH NAME catdoc \- reads MS-Word file and puts its content as plain text on standard output .SH SYNOPSIS diff -Nru catdoc-0.94.3/doc/catppt.1 catdoc-0.94.4/doc/catppt.1 --- catdoc-0.94.3/doc/catppt.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/catppt.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH ppt2text 1 "Version 0.94.2" "MS-PowerPoint reader" +.TH ppt2text 1 "Version 0.94.4" "MS-PowerPoint reader" .SH NAME catppt \- reads MS-PowerPoint file and puts its content on standard output .SH SYNOPSIS diff -Nru catdoc-0.94.3/doc/wordview.1 catdoc-0.94.4/doc/wordview.1 --- catdoc-0.94.3/doc/wordview.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/wordview.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH wordview 1 "Version 0.94.2" "MS-Word reader" +.TH wordview 1 "Version 0.94.4" "MS-Word reader" .SH NAME wordview \- displays text contained in MS-Word file in X window diff -Nru catdoc-0.94.3/doc/xls2csv.1 catdoc-0.94.4/doc/xls2csv.1 --- catdoc-0.94.3/doc/xls2csv.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/xls2csv.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH xls2csv 1 "Version 0.94.2" "MS-Word reader" +.TH xls2csv 1 "Version 0.94.4" "MS-Word reader" .SH NAME xls2csv \- reads MS-Excel file and puts its content as comma-separated data on standard output .SH SYNOPSIS diff -Nru catdoc-0.94.3/.pc/applied-patches catdoc-0.94.4/.pc/applied-patches --- catdoc-0.94.3/.pc/applied-patches 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/applied-patches 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -debian-changes-0.94.3-1 diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catdoc.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catdoc.txt --- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catdoc.txt 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catdoc.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,258 +0,0 @@ -catdoc(1) catdoc(1) - - - -NAME - catdoc - reads MS-Word file and puts its content as plain text on stanâ - dard output - -SYNOPSIS - catdoc [-vlu8btawxV] [-m number] [ -s charset] [ -d charset] [ -f outâ - put-format] file - - -DESCRIPTION - catdoc behaves much like cat(1) but it reads MS-Word file and produces - human-readable text on standard output. Optionally it can use latex(1) - escape sequences for characters which have special meaning for LaTeX. - It also makes some effort to recognize MS-Word tables, although it - never tries to write correct headers for LaTeX tabular environment. - Additional output formats, such is HTML can be easily defined. - - catdoc doesn't attempt to extract formatting information other than - tables from MS-Word document, so different output modes means mainly - that different characters should be escaped and different ways used to - represent characters, missing from output charset. See CHARACTER SUBâ - STITUTION below - - - catdoc uses internal unicode(7) representation of text, so it is able - to convert texts when charset in source document doesn't match charset - on target system. See CHARACTER SETS below. - - If no file names supplied, catdoc processes its standard input unless - it is terminal. It is unlikely that somebody could type Word document - from keyboard, so if catdoc invoked without arguments and stdin is not - redirected, it prints brief usage message and exits. Processing of - standard input (even among other files) can be forced using dash '-' as - file name. - - By default, catdoc wraps lines which are more than 72 chars long and - separates paragraphs by blank lines. This behavior can be turned of by - -w switch. In wide mode catdoc prints each paragraph as one long line, - suitable for import into word processors which perform word wrapping. - - - -OPTIONS - -a - shortcut for -f ascii. Produces ASCII text as output. Sepaâ - rates table columns with TAB - - -b - process broken MS-Word file. Normally, catdoc checks if first - 8 bytes of file is Microsoft OLE signature. If so, it processes - file, otherwise it just copies it to stdin. It is intended to - use catdoc as filter for viewing all files with .doc extension. - - -dcharset - - specifies destination charset name. Charset file has format - described in CHARACTER SETS below and should have .txt extenâ - sion and reside in catdoc library directory ( ${exec_preâ - fix}/lib/catdoc). By default, current locale charset is used if - langinfo support compiled in. - - -fformat - - specifies output format as described in CHARACTER SUBSTITUâ - TION below. catdoc comes with two output formats - ascii and - tex. You can add your own if you wish. - - -l Causes catdoc to list names of available charsets to the stdout - and exit successfully. - - -mnumber - Specifies right margin for text (default 72). -m 0 is equivaâ - lent to -w - - -scharset - Specifies source charset. (one used in Word document), if Word - document doesn't contain UTF-16 text. When reading rtf docuâ - ments, it is typically not necessary, because rtf documents - contain ansicpg specification. But it can be set wrong by Word - (I've seen RTF documents on Russian, where cp1252 was speciâ - fied). In this case this option would take precedence over - charset, specified in the document. But source_charset stateâ - ment in the configuration file have less priority than charset - in the document. - - -t - shortcut for -f tex - converts all printable chars, which have special meaning for - LaTeX(1) into appropriate control sequences. Separates table - columns by &. - - -u - declares that Word document contain UNICODE (UTF-16) - representation of text (as some Word-97 documents). If catdoc - fails to correct Word document with default charset, try - this option. - - -8 - declares is Word document is 8 bit. Just in case that catdoc - recognizes file format incorrectly. - - -w disables word wrapping. By default catdoc output is split into - lines not longer than 72 (or number, specified by -m option) - characters and paragraphs are separated by blank line. With - this option each paragraph is one long line. - - -x causes catdoc to output unknown UNICODE character as \xNNNN, - instead of question marks. - - -v causes catdoc to print some useless information about word docâ - ument structure to stdout before actual start of text. - - -V outputs catdoc version - - -CHARACTER SETS - When processing MS-Word file catdoc uses information about two characâ - ter sets, typically different - - input and output. They are stored in plain text files in catdoc - data directory. Character set files should contain two whitespace-sepaâ - rated hexadecimal numbers - 8-bit code in character set and 16-bit Uniâ - code code. Anything from hash mark to end of line is ignored, as well - as blank lines. - - catdoc distribution includes some of these character sets. Additional - character set definitions, directly usable by catdoc can be obtained - from ftp.unicode.org. Charset files have .txt suffix, which shouldn't - be specified in command-line or configuration files. - - Note that catdoc is distributed with Cyrillic charsets as default. If - you are not Russian, you probably don't want it, an should reconfigure - catdoc at compile time or in runtime configuration file. - - When dealing with documents with charsets other than default, remember - that Microsoft never uses ISO charsets. While letters in, say cp1252 - are at the same position as in ISO-8859-1, some punctuation signs would - be lost, if you specify ISO-8859-1 as input charset. If you use cp1252, - catdoc would deal with those signs as described in CHARACTER SUBSTITUâ - TION below. - - -CHARACTER SUBSTITUTION - catdoc converts MS-Word file into following internal Unicode represenâ - tation: - - 1. Paragraphs are separated by ASCII Line Feed symbol (0x000A) - - 2. Table cells within row are separated by ASCII Field Separator symbol - (0x001C) - - 3. Table rows are separated by ASCII Record Separator (0x001E) - - 4. All printable characters, including whitespace are represented with - their - respective UNICODE codes. - - This UNICODE representation is subsequently converted into 8-bit text - in target character set using following four-step algorithm: - - 1. List of special characters is searched for given Unicode character. - If found, then appropriate multi-character sequence is output - instead of character. - - 2. If there is an equivalent in target character set, it is output. - - 3. Otherwise, replacement list is searched and, if there is multi-charâ - acter - substitution for this UNICODE char, it is output. - - 4. If all above fails, "Unknown char" symbol (question mark) is output. - - Lists of special characters and list of substitution are character set- - independent, because special chars should be escaped regardless of - their existence in target character set (usually, they are parts of - US-ASCII, and therefore exist in any character set) and replacement - list is searched only for those characters, which are not found in tarâ - get character set. - - These lists are stored in catdoc data directory in files with prefix of - format name. These files have following format: - - Each line can be either comment (starting with hash mark) or contain - hexadecimal UNICODE value, separated by whitespace from string, which - would be substituted instead of it. If string contain no whitespace it - can be used as is, otherwise it should be enclosed in single or double - quotes. Usual backslash sequences like '\n','\t' can be used in these - string. - - - -RUNTIME CONFIGURATION - Upon startup catdoc reads its system-wide configuration file /etc/catâ - docrc and then user-specific configuration file ${HOME}/.catdocrc. - - These files can contain following directives: - - source_charset = charset-name - Sets default source charset, which would be used if no -s - option specified. Consult configuration of nearby windows workâ - station to find one you need. - - target_charset = charset-name - Sets default output charset. You probably know, which one you - use. - - charset_path = directory-list - colon-separated list of directories, which are searched for - charset files. This allows you to install additional charsets - in your home directory. If first directory component of path - is ~ it is replaced by contents of HOME environment variable. - On MS-DOS platform, if directory name starts with %s, it is - replaced with directory of executable file. Empty element in - list (i.e. two consequitve colons) is considered current direcâ - tory. - - map_path = directory-list - colon-separated list of directories, which are searched for - special character map and replacement map. Same substitution - rules as in charset_path are applied. - - format = format name - Output format which would be used by default. catdoc comes - with two formats - ascii and tex but nothing prevents you from - writing your own format (set two map files - special character - map and replacement map). - - unknown_char = character specification - sets character to output instead of unknown Unicode character - (default '?') Character specification can have one of two form - - character enclosed in single quotes or hexadecimal code. - - use_locale =(yes|no) - Enables or disables automatic selection of output charset - (default yes), - based on system locale settings (if enabled at compile time). - If automatic detection is enabled, than output charset settings - in the configuration files (but not in the command line) are - ignored, and current system locale charset is used instead. - There are no automatic choice of input charset, based of locale - language, because most modern Word files (since Word 97) are - Unicode anyway - - -BUGS - Doesn't handle fast-saves properly. Prints footnotes as separate paraâ - graphs at the end of file, instead of producing correct LaTeX commands. - Cannot distinguish between empty table cell and end of table row. - - - - -SEE ALSO - xls2csv(1), cat(1), strings(1), utf(4), unicode(7) - - -AUTHOR - V.B.Wagner <vi...@45.free.net> - - - -MS-Word reader Version 0.94.2 catdoc(1) diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catppt.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catppt.txt --- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catppt.txt 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catppt.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,51 +0,0 @@ -ppt2text(1) ppt2text(1) - - - -NAME - catppt - reads MS-PowerPoint file and puts its content on standard outâ - put - -SYNOPSIS - catppt [-lV] [-b string ] [-s charset ] [-d charset ] files - - -DESCRIPTION - catppt reads MS-PowerPoint presentations and dumps its content to stdâ - out. - -OPTIONS - -l list known charsets and exit successfully - - -bstring - slides break string. This string (by default - formfeed) would - be output at the end of each slide page. - - - -dcharset` - - specifies destination charset name. Charset file has format - described in CHARACTER SETS section of catdoc(1) manual page. - By default, current locale charset would be used if langinfo - support was enabled at the compile time. - - - -scharset - - specifies source charset. Typically, PowerPoint files use - UNICODE strings with known charsets, but for some reason you - may wish to override it. - - - -V outputs version number - - -SEE ALSO - cat(1), catdoc(1), xls2csv(1), strings(1), utf(4), unicode(4) - - -AUTHOR - Alex Ott <alex...@gmail.com> - - - - -MS-PowerPoint reader Version 0.94.2 ppt2text(1) diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt --- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,85 +0,0 @@ -xls2csv(1) xls2csv(1) - - - -NAME - xls2csv - reads MS-Excel file and puts its content as comma-separated - data on standard output - -SYNOPSIS - xls2csv [-xlV] [-f format ] [-b string ] [-s charset ] [-d charset - ] [-q number ] [-c char] files - - -DESCRIPTION - xls2csv reads MS-Excel spreadsheet and dumps its content as comma-sepaâ - rated values to stdout. Numbers are printed without delimiters, strings - are enclosed in the double quotes. Double-quotes inside string are douâ - bled. - -OPTIONS - -x print unknown Unicode chars as \xNNNN, rather than as question - marks - - -l list known charsets and exit successfully - - -cchar cell separator char. By default - comma. - - -bstring - sheet break string. This string (by default - formfeed) would - be output at the end of each workbook page. This string is - printed after page starting at start of line, but no linefeed - would be automatically added at the end of string. Include newâ - line at the ent of sheet separator if you want it to appear on - separate line by itself - - -gnumber number of decimal digits in the numbers. By default maximal - double precision (system-dependent macro DBL_DIG) is used. - - -qnumber - set quote mode. In quote mode 0 cell contents is never quoted. - In quote mode 1 only strings which contain spaces, double - quotes or commas are quoted. In quote mode 2 (default) all - cells with type string are quoted. In quote mode 3 all cells - are quoted. - - - -dcharset` - - specifies destination charset name. Charset file has format - described in CHARACTER SETS section of catdoc(1) manual page. - By default, current locale charset would be used if langinfo - support was enabled at the compile time. - - - -scharset - - specifies source charset. Typically, Excel files have CODE - PAGE record, which denotes input charset, but for some reason - you may wish to override it. - - -fformat - - specifies date/time format to use for output of all Excel - date and time values. If this option is not specified, forâ - mat, specified in the spreadsheet is used. On POSIX system any - format, allowed by strftime(3) can be used as value of this - option. Under MS-DOS xls2csv implements limited set of strftime - formats, namely m, d, y, Y, b, l, p, H, M, S. - - - -V outputs version number - - -FILES - ${HOME}/.catdocrc, catdoc charset files and substitution map files (see - catdoc(1) manual page for details, - - -SEE ALSO - cat(1), catdoc(1), strings(1), utf8(7), unicode(7) - - -AUTHOR - V.B.Wagner <vi...@45.free.net>, based on biffview by David Rysdam - - - -MS-Word reader Version 0.94.2 xls2csv(1) diff -Nru catdoc-0.94.3/.pc/.quilt_patches catdoc-0.94.4/.pc/.quilt_patches --- catdoc-0.94.3/.pc/.quilt_patches 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/.quilt_patches 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -debian/patches diff -Nru catdoc-0.94.3/.pc/.quilt_series catdoc-0.94.4/.pc/.quilt_series --- catdoc-0.94.3/.pc/.quilt_series 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/.quilt_series 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -series diff -Nru catdoc-0.94.3/.pc/.version catdoc-0.94.4/.pc/.version --- catdoc-0.94.3/.pc/.version 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/.version 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -2 diff -Nru catdoc-0.94.3/src/xlsparse.c catdoc-0.94.4/src/xlsparse.c --- catdoc-0.94.3/src/xlsparse.c 2012-06-10 13:47:37.000000000 +0100 +++ catdoc-0.94.4/src/xlsparse.c 2012-12-03 18:03:52.000000000 +0000 @@ -589,8 +589,8 @@ void CleanUpFormatIdxUsed() { int i; - for (i=0;i<NUMOFDATEFORMATS; i++); - FormatIdxUsed[i]=0; + for (i=0;i<NUMOFDATEFORMATS; i++) + FormatIdxUsed[i]=0; } /* diff -Nru catdoc-0.94.3/tarball.sh catdoc-0.94.4/tarball.sh --- catdoc-0.94.3/tarball.sh 2012-06-10 14:02:08.000000000 +0100 +++ catdoc-0.94.4/tarball.sh 2012-12-03 18:49:06.000000000 +0000 @@ -5,8 +5,18 @@ set -e -debclean -cd ../ -tar -czf catdoc-0.94.3.tar.gz ./catdoc-0.94.3 --exclude=debian --exclude=.svn - ln -sf catdoc-0.94.3.tar.gz catdoc_0.94.3.orig.tar.gz +test ! -d .pc + +VERSION=0.94.4 +#debclean +find . -name '*.o' -delete +rm -f config.cache config.log config.status Makefile src/catppt src/xls2csv +rm -f doc/Makefile charsets/Makefile build-stamp install-stamp src/catdoc src/wordview src/Makefile +cd ../ +if [ -h catdoc-${VERSION} ]; then + rm catdoc-${VERSION} +fi +ln -s ./wheezy/ catdoc-${VERSION} +tar -czf catdoc-${VERSION}.tar.gz ./catdoc-${VERSION}/* --exclude=debian --exclude=.svn +ln -sf catdoc-${VERSION}.tar.gz catdoc_${VERSION}.orig.tar.gz
pgpOYR1fL3Lc5.pgp
Description: PGP signature