CMakeLists.txt | 1 goo/Makefile.am | 2 goo/NetPBMWriter.cc | 84 ++++++++ goo/NetPBMWriter.h | 52 +++++ goo/PNGWriter.cc | 4 goo/PNGWriter.h | 2 poppler/JBIG2Stream.h | 1 poppler/Stream.h | 5 utils/HtmlOutputDev.cc | 31 +-- utils/ImageOutputDev.cc | 463 +++++++++++++++++++++++++++++++++--------------- utils/ImageOutputDev.h | 44 +++- utils/pdfimages.1 | 84 +++++++- utils/pdfimages.cc | 44 ++++ utils/pdftocairo.cc | 2 14 files changed, 640 insertions(+), 179 deletions(-)
New commits: commit af4f2b775946815b572622bf4c4d42ad3aea1141 Author: Adrian Johnson <[email protected]> Date: Sat Aug 24 21:25:51 2013 +0930 pdfimages: Add -all option to write all image in their native format diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 5a87573..a841ad7 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -87,6 +87,10 @@ Encoding uses 0 for black and 1 for white Input data fills from most significant bit to least significant bit. .RE .TP +.B \-all +Write JPEG, JPEG2000, JBIG2, and CCITT images in their native format. All other images are written as PNG files. +This is equivalent to specifying the options \-png \-j \-jp2 \-jbig2 \-ccitt. +.TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an .IR image-root diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 86ba3b7..ae05c8b 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -56,6 +56,7 @@ static GBool dumpJPEG = gFalse; static GBool dumpJP2 = gFalse; static GBool dumpJBIG2 = gFalse; static GBool dumpCCITT = gFalse; +static GBool allFormats = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -84,6 +85,8 @@ static const ArgDesc argDesc[] = { "write JBIG2 images as JBIG2 files"}, {"-ccitt", argFlag, &dumpCCITT, 0, "write CCITT images as CCITT files"}, + {"-all", argFlag, &allFormats, 0, + "equivalent to -png -j -jp2 -jbig2 -ccitt"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -189,12 +192,20 @@ int main(int argc, char *argv[]) { // write image files imgOut = new ImageOutputDev(imgRoot, pageNames, listImages); if (imgOut->isOk()) { - imgOut->enablePNG(enablePNG); - imgOut->enableTiff(enableTiff); - imgOut->enableJpeg(dumpJPEG); - imgOut->enableJpeg2000(dumpJP2); - imgOut->enableJBig2(dumpJBIG2); - imgOut->enableCCITT(dumpCCITT); + if (allFormats) { + imgOut->enablePNG(gTrue); + imgOut->enableJpeg(gTrue); + imgOut->enableJpeg2000(gTrue); + imgOut->enableJBig2(gTrue); + imgOut->enableCCITT(gTrue); + } else { + imgOut->enablePNG(enablePNG); + imgOut->enableTiff(enableTiff); + imgOut->enableJpeg(dumpJPEG); + imgOut->enableJpeg2000(dumpJP2); + imgOut->enableJBig2(dumpJBIG2); + imgOut->enableCCITT(dumpCCITT); + } doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); } commit 25e96b6ddbbe54a75ddb97d2e235c1bd6033fe79 Author: Adrian Johnson <[email protected]> Date: Wed Aug 21 22:22:28 2013 +0930 pdfimages: support ccitt output diff --git a/poppler/Stream.h b/poppler/Stream.h index 9b40fd1..00b2925 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -797,6 +797,11 @@ public: virtual void unfilteredReset (); + int getEncoding() { return encoding; } + GBool getEndOfLine() { return endOfLine; } + int getColumns() { return columns; } + GBool getBlackIs1() { return black; } + private: void ccittReset(GBool unfiltered); diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index ebdd644..ae7d309 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -454,6 +454,41 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JBIG2 embedded file writeRawImage(str, "jb2e"); + } else if (dumpCCITT && str->getKind() == strCCITTFax && !inlineImg) { + // write CCITT parameters + CCITTFaxStream *ccittStr = static_cast<CCITTFaxStream *>(str); + FILE *f; + setFilename("params"); + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + if (ccittStr->getEncoding() < 0) + fprintf(f, "-4 "); + else if (ccittStr->getEncoding() == 0) + fprintf(f, "-1 "); + else + fprintf(f, "-2 "); + + if (ccittStr->getEndOfLine()) + fprintf(f, "-A "); + else + fprintf(f, "-P "); + + fprintf(f, "-X %d ", ccittStr->getColumns()); + + if (ccittStr->getBlackIs1()) + fprintf(f, "-W "); + else + fprintf(f, "-B "); + + fprintf(f, "-M\n"); // PDF uses MSB first + + fclose(f); + + // dump CCITT file + writeRawImage(str, "ccitt"); + } else if (outputPNG) { // output in PNG format diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index 75063a2..8d0785c 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -85,6 +85,9 @@ public: // Use JBIG2 format for JBIG2 files void enableJBig2(GBool jbig2) { dumpJBIG2 = jbig2; } + // Use CCITT format for CCITT files + void enableCCITT(GBool ccitt) { dumpCCITT = ccitt; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -162,6 +165,7 @@ private: GBool dumpJPEG; // set to dump native JPEG files GBool dumpJP2; // set to dump native JPEG2000 files GBool dumpJBIG2; // set to dump native JBIG2 files + GBool dumpCCITT; // set to dump native CCITT files GBool outputPNG; // set to output in PNG format GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 23530c5..5a87573 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -50,12 +50,49 @@ Write images in JPEG2000 format as JP2 files instead of the default format. The .B \-jbig2 Write images in JBIG2 format as JBIG2 files instead of the default format. JBIG2 data in PDF is of the embedded type. The embedded type of JBIG2 has an optional separate file containing global data. The embedded data is written with the extension .jb2e and the global data (if available) will be written to the same image number with the extension .jb2g. The content of both these files is indentical to the JBIG2 data in the PDF. .TP +.B \-ccitt +Write images in CCITT format as CCITT files instead of the default +format. The CCITT file is identical to the JPEG2000 data stored in the +PDF. PDF files contain additional parameters specifying +how to decode the CCITT data. These parameters are translated to +fax2tiff input options and written to a .params file with the same image +number. The parameters are: +.RS +.TP +.B \-1 +1D Group 3 encoding +.TP +.B \-2 +2D Group 3 encoding +.TP +.B \-4 +Group 4 encoding +.TP +.B \-A +Beginning of line is aligned on a byte boundary +.TP +.B \-P +Beginning of line is not aligned on a byte boundary +.TP +.B \-X n +The image width in pixels +.TP +.B \-W +Encoding uses 1 for black and 0 for white +.TP +.B \-B +Encoding uses 0 for black and 1 for white +.TP +.B \-M +Input data fills from most significant bit to least significant bit. +.RE +.TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an .IR image-root with this option. .IP -The following information is listed for each font: +The following information is listed for each image: .RS .TP .B page diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 48d12c4..86ba3b7 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -55,6 +55,7 @@ static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; static GBool dumpJP2 = gFalse; static GBool dumpJBIG2 = gFalse; +static GBool dumpCCITT = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -81,6 +82,8 @@ static const ArgDesc argDesc[] = { "write JPEG2000 images as JP2 files"}, {"-jbig2", argFlag, &dumpJBIG2, 0, "write JBIG2 images as JBIG2 files"}, + {"-ccitt", argFlag, &dumpCCITT, 0, + "write CCITT images as CCITT files"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -191,6 +194,7 @@ int main(int argc, char *argv[]) { imgOut->enableJpeg(dumpJPEG); imgOut->enableJpeg2000(dumpJP2); imgOut->enableJBig2(dumpJBIG2); + imgOut->enableCCITT(dumpCCITT); doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); } commit 086413263cb63a24d9492fbe534fdcc34b45951a Author: Adrian Johnson <[email protected]> Date: Sun Aug 18 20:37:01 2013 +0930 pdfimages: support JBIG2 output diff --git a/poppler/JBIG2Stream.h b/poppler/JBIG2Stream.h index be1b3bd..0ee2518 100644 --- a/poppler/JBIG2Stream.h +++ b/poppler/JBIG2Stream.h @@ -56,6 +56,7 @@ public: virtual int lookChar(); virtual GooString *getPSFilter(int psLevel, const char *indent); virtual GBool isBinary(GBool last = gTrue); + virtual Object *getGlobalsStream() { return &globalsStream; } private: virtual GBool hasGetChars() { return true; } diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index b92acaa..ebdd644 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -48,6 +48,7 @@ #include "GfxState.h" #include "Object.h" #include "Stream.h" +#include "JBIG2Stream.h" #include "ImageOutputDev.h" ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA) { @@ -429,6 +430,30 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JPEG2000 file writeRawImage(str, "jp2"); + } else if (dumpJBIG2 && str->getKind() == strJBIG2 && !inlineImg) { + // dump JBIG2 globals stream if available + JBIG2Stream *jb2Str = static_cast<JBIG2Stream *>(str); + Object *globals = jb2Str->getGlobalsStream(); + if (globals->isStream()) { + FILE *f; + int c; + Stream *str = globals->getStream(); + + setFilename("jb2g"); + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + str->reset(); + while ((c = str->getChar()) != EOF) + fputc(c, f); + str->close(); + fclose(f); + } + + // dump JBIG2 embedded file + writeRawImage(str, "jb2e"); + } else if (outputPNG) { // output in PNG format diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index b674f05..75063a2 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -82,6 +82,9 @@ public: // Use Jpeg2000 format for Jpeg2000 files void enableJpeg2000(GBool jp2) { dumpJP2 = jp2; } + // Use JBIG2 format for JBIG2 files + void enableJBig2(GBool jbig2) { dumpJBIG2 = jbig2; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -158,6 +161,7 @@ private: GBool listImages; // list images instead of dumping GBool dumpJPEG; // set to dump native JPEG files GBool dumpJP2; // set to dump native JPEG2000 files + GBool dumpJBIG2; // set to dump native JBIG2 files GBool outputPNG; // set to output in PNG format GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 79b3aad..23530c5 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -11,7 +11,7 @@ pdfimages \- Portable Document Format (PDF) image extractor .B Pdfimages saves images from a Portable Document Format (PDF) file as Portable Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG), -Tagged Image File Format (TIFF), JPEG, or JPEG2000 files. +Tagged Image File Format (TIFF), JPEG, JPEG2000, or JBIG2 files. .PP Pdfimages reads the PDF file .IR PDF-file , @@ -21,12 +21,12 @@ where .I nnn is the image number and .I xxx -is the image type (.ppm, .pbm, .png, .tif, .jpg, or jp2). +is the image type (.ppm, .pbm, .png, .tif, .jpg, jp2, jb2e, or jb2g). .PP The default output format is PBM (for monochrome images) or PPM for non-monochrome. The -\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j and -\-jp2 options will cause JPEG and JPEG2000, respectively, images in the PDF file to be written in their -native format. +\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j, +\-jp2, and \-jbig2 options will cause JPEG, JPEG2000, and JBIG2, respectively, images in the PDF file +to be written in their native format. .SH OPTIONS .TP .BI \-f " number" @@ -47,6 +47,9 @@ Write images in JPEG format as JPEG files instead of the default format. The JPE .B \-jp2 Write images in JPEG2000 format as JP2 files instead of the default format. The JP2 file is identical to the JPEG2000 data stored in the PDF. .TP +.B \-jbig2 +Write images in JBIG2 format as JBIG2 files instead of the default format. JBIG2 data in PDF is of the embedded type. The embedded type of JBIG2 has an optional separate file containing global data. The embedded data is written with the extension .jb2e and the global data (if available) will be written to the same image number with the extension .jb2g. The content of both these files is indentical to the JBIG2 data in the PDF. +.TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an .IR image-root diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 0d2a420..48d12c4 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -54,6 +54,7 @@ static GBool enablePNG = gFalse; static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; static GBool dumpJP2 = gFalse; +static GBool dumpJBIG2 = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -78,6 +79,8 @@ static const ArgDesc argDesc[] = { "write JPEG images as JPEG files"}, {"-jp2", argFlag, &dumpJP2, 0, "write JPEG2000 images as JP2 files"}, + {"-jbig2", argFlag, &dumpJBIG2, 0, + "write JBIG2 images as JBIG2 files"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -187,6 +190,7 @@ int main(int argc, char *argv[]) { imgOut->enableTiff(enableTiff); imgOut->enableJpeg(dumpJPEG); imgOut->enableJpeg2000(dumpJP2); + imgOut->enableJBig2(dumpJBIG2); doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); } commit 2845ebabd00a2755549b8db436e78a3e0e0c0713 Author: Adrian Johnson <[email protected]> Date: Sun Aug 18 20:07:31 2013 +0930 pdfimages: add support for writing JPEG2000 files diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index aef3f65..b92acaa 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -425,6 +425,10 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JPEG file writeRawImage(str, "jpg"); + } else if (dumpJP2 && str->getKind() == strJPX && !inlineImg) { + // dump JPEG2000 file + writeRawImage(str, "jp2"); + } else if (outputPNG) { // output in PNG format diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index a3d18f7..b674f05 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -79,6 +79,9 @@ public: // Use Jpeg format for Jpeg files void enableJpeg(GBool jpeg) { dumpJPEG = jpeg; } + // Use Jpeg2000 format for Jpeg2000 files + void enableJpeg2000(GBool jp2) { dumpJP2 = jp2; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -154,6 +157,7 @@ private: char *fileName; // buffer for output file names GBool listImages; // list images instead of dumping GBool dumpJPEG; // set to dump native JPEG files + GBool dumpJP2; // set to dump native JPEG2000 files GBool outputPNG; // set to output in PNG format GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 5b07c5c..79b3aad 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -11,7 +11,7 @@ pdfimages \- Portable Document Format (PDF) image extractor .B Pdfimages saves images from a Portable Document Format (PDF) file as Portable Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG), -Tagged Image File Format (TIFF), or JPEG files. +Tagged Image File Format (TIFF), JPEG, or JPEG2000 files. .PP Pdfimages reads the PDF file .IR PDF-file , @@ -21,11 +21,12 @@ where .I nnn is the image number and .I xxx -is the image type (.ppm, .pbm, .png, .tif, or .jpg). +is the image type (.ppm, .pbm, .png, .tif, .jpg, or jp2). .PP The default output format is PBM (for monochrome images) or PPM for non-monochrome. The -\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j option -will cause JPEG images in the PDF file to be written in JPEG format. +\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j and +\-jp2 options will cause JPEG and JPEG2000, respectively, images in the PDF file to be written in their +native format. .SH OPTIONS .TP .BI \-f " number" @@ -43,6 +44,9 @@ Change the default output format to TIFF. .B \-j Write images in JPEG format as JPEG files instead of the default format. The JPEG file is identical to the JPEG data stored in the PDF. .TP +.B \-jp2 +Write images in JPEG2000 format as JP2 files instead of the default format. The JP2 file is identical to the JPEG2000 data stored in the PDF. +.TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an .IR image-root diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index fee7b75..0d2a420 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -53,6 +53,7 @@ static GBool listImages = gFalse; static GBool enablePNG = gFalse; static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; +static GBool dumpJP2 = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -75,6 +76,8 @@ static const ArgDesc argDesc[] = { #endif {"-j", argFlag, &dumpJPEG, 0, "write JPEG images as JPEG files"}, + {"-jp2", argFlag, &dumpJP2, 0, + "write JPEG2000 images as JP2 files"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -183,6 +186,7 @@ int main(int argc, char *argv[]) { imgOut->enablePNG(enablePNG); imgOut->enableTiff(enableTiff); imgOut->enableJpeg(dumpJPEG); + imgOut->enableJpeg2000(dumpJP2); doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); } commit 2021c8ffcb36432049c4305e85ced2ae139086f3 Author: Adrian Johnson <[email protected]> Date: Sun Aug 18 17:29:00 2013 +0930 pdfimages: add support for png and tiff output diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 110ba21..aef3f65 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -42,19 +42,23 @@ #include <math.h> #include "goo/gmem.h" #include "goo/NetPBMWriter.h" +#include "goo/PNGWriter.h" +#include "goo/TiffWriter.h" #include "Error.h" #include "GfxState.h" #include "Object.h" #include "Stream.h" #include "ImageOutputDev.h" -ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) { +ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA) { listImages = listImagesA; if (!listImages) { fileRoot = copyString(fileRootA); fileName = (char *)gmalloc(strlen(fileRoot) + 45); } - dumpJPEG = dumpJPEGA; + outputPNG = gFalse; + outputTiff = gFalse; + dumpJPEG = gFalse; pageNames = pageNamesA; imgNum = 0; pageNum = 0; @@ -374,6 +378,21 @@ void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const writer->writeRow(&row); break; + case imgGray: + p = imgStr->getLine(); + rowp = row; + for (int x = 0; x < width; ++x) { + if (p) { + colorMap->getGray(p, &gray); + *rowp++ = colToByte(gray); + p += colorMap->getNumPixelComps(); + } else { + *rowp++ = 0; + } + } + writer->writeRow(&row); + break; + case imgMonochrome: int size = (width + 7)/8; for (int x = 0; x < size; x++) @@ -406,10 +425,52 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JPEG file writeRawImage(str, "jpg"); + } else if (outputPNG) { + // output in PNG format + +#if ENABLE_LIBPNG + ImgWriter *writer; + + if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { + writer = new PNGWriter(PNGWriter::MONOCHROME); + format = imgMonochrome; + } else if (colorMap->getColorSpace()->getMode() == csDeviceGray || + colorMap->getColorSpace()->getMode() == csCalGray) { + writer = new PNGWriter(PNGWriter::GRAY); + format = imgGray; + } else { + writer = new PNGWriter(PNGWriter::RGB); + format = imgRGB; + } + + writeImageFile(writer, format, "png", str, width, height, colorMap); +#endif + + } else if (outputTiff) { + // output in TIFF format + +#if ENABLE_LIBTIFF + ImgWriter *writer; + + if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { + writer = new TiffWriter(TiffWriter::MONOCHROME); + format = imgMonochrome; + } else if (colorMap->getColorSpace()->getMode() == csDeviceGray || + colorMap->getColorSpace()->getMode() == csCalGray) { + writer = new TiffWriter(TiffWriter::GRAY); + format = imgGray; + } else { + writer = new TiffWriter(TiffWriter::RGB); + format = imgRGB; + } + + writeImageFile(writer, format, "tif", str, width, height, colorMap); +#endif + } else { + // output in PPM/PBM format ImgWriter *writer; - // dump PBM file if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { writer = new NetPBMWriter(NetPBMWriter::MONOCHROME); format = imgMonochrome; diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index d89836f..a3d18f7 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -55,19 +55,30 @@ public: }; enum ImageFormat { imgRGB, + imgGray, imgMonochrome }; // Create an OutputDev which will write images to files named // <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if // <pageNames> is set. Normally, all images are written as PBM - // (.pbm) or PPM (.ppm) files. If <dumpJPEG> is set, JPEG images + // (.pbm) or PPM (.ppm) files unless PNG or Tiff output is enabled + // (PNG is used if both are enabled). If Jpeg is enabled, JPEG images // are written as JPEG (.jpg) files. - ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA); + ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA); // Destructor. virtual ~ImageOutputDev(); + // Use PNG format for output + void enablePNG(GBool png) { outputPNG = png; } + + // Use TIFF format for output + void enableTiff(GBool tiff) { outputTiff = tiff; } + + // Use Jpeg format for Jpeg files + void enableJpeg(GBool jpeg) { dumpJPEG = jpeg; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -143,6 +154,8 @@ private: char *fileName; // buffer for output file names GBool listImages; // list images instead of dumping GBool dumpJPEG; // set to dump native JPEG files + GBool outputPNG; // set to output in PNG format + GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names int pageNum; // current page number int imgNum; // current image number diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index e98bf29..5b07c5c 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -10,17 +10,22 @@ pdfimages \- Portable Document Format (PDF) image extractor .SH DESCRIPTION .B Pdfimages saves images from a Portable Document Format (PDF) file as Portable -Pixmap (PPM), Portable Bitmap (PBM), or JPEG files. +Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG), +Tagged Image File Format (TIFF), or JPEG files. .PP Pdfimages reads the PDF file .IR PDF-file , -scans one or more pages, and writes one PPM, PBM, or JPEG file for each image, +scans one or more pages, and writes one file for each image, .IR image-root - nnn . xxx , where .I nnn is the image number and .I xxx -is the image type (.ppm, .pbm, .jpg). +is the image type (.ppm, .pbm, .png, .tif, or .jpg). +.PP +The default output format is PBM (for monochrome images) or PPM for non-monochrome. The +\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j option +will cause JPEG images in the PDF file to be written in JPEG format. .SH OPTIONS .TP .BI \-f " number" @@ -29,11 +34,14 @@ Specifies the first page to scan. .BI \-l " number" Specifies the last page to scan. .TP +.B \-png +Change the default output format to PNG. +.TP +.B \-tiff +Change the default output format to TIFF. +.TP .B \-j -Normally, all images are written as PBM (for monochrome images) or PPM -(for non-monochrome images) files. With this option, images in DCT -format are saved as JPEG files. All non-DCT images are saved in -PBM/PPM format as usual. +Write images in JPEG format as JPEG files instead of the default format. The JPEG file is identical to the JPEG data stored in the PDF. .TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 7b6cad0..fee7b75 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -50,6 +50,8 @@ static int firstPage = 1; static int lastPage = 0; static GBool listImages = gFalse; +static GBool enablePNG = gFalse; +static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; @@ -63,6 +65,14 @@ static const ArgDesc argDesc[] = { "first page to convert"}, {"-l", argInt, &lastPage, 0, "last page to convert"}, +#if ENABLE_LIBPNG + {"-png", argFlag, &enablePNG, 0, + "change the default output format to PNG"}, +#endif +#if ENABLE_LIBTIFF + {"-tiff", argFlag, &enableTiff, 0, + "change the default output format to TIFF"}, +#endif {"-j", argFlag, &dumpJPEG, 0, "write JPEG images as JPEG files"}, {"-list", argFlag, &listImages, 0, @@ -168,10 +178,13 @@ int main(int argc, char *argv[]) { lastPage = doc->getNumPages(); // write image files - imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages); + imgOut = new ImageOutputDev(imgRoot, pageNames, listImages); if (imgOut->isOk()) { - doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, - gTrue, gFalse, gFalse); + imgOut->enablePNG(enablePNG); + imgOut->enableTiff(enableTiff); + imgOut->enableJpeg(dumpJPEG); + doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, + gTrue, gFalse, gFalse); } delete imgOut; commit 8f466775c77b09a7114c688004317e6db05bcd3f Author: Adrian Johnson <[email protected]> Date: Sun Aug 18 16:08:02 2013 +0930 Change PNGWriter monochrome format to be 8 pixels/byte to be consistent with TiffWriter and NetPBMWriter diff --git a/goo/PNGWriter.cc b/goo/PNGWriter.cc index b775600..4370f28 100644 --- a/goo/PNGWriter.cc +++ b/goo/PNGWriter.cc @@ -147,10 +147,6 @@ bool PNGWriter::init(FILE *f, int width, int height, int hDPI, int vDPI) return false; } - // pack 1 pixel/byte rows into 8 pixels/byte - if (priv->format == MONOCHROME) - png_set_packing(priv->png_ptr); - return true; } diff --git a/goo/PNGWriter.h b/goo/PNGWriter.h index ac8f95a..64b8833 100644 --- a/goo/PNGWriter.h +++ b/goo/PNGWriter.h @@ -31,7 +31,7 @@ public: /* RGB - 3 bytes/pixel * RGBA - 4 bytes/pixel * GRAY - 1 byte/pixel - * MONOCHROME - 1 byte/pixel. PNGWriter will bitpack to 8 pixels/byte + * MONOCHROME - 8 pixels/byte */ enum Format { RGB, RGBA, GRAY, MONOCHROME }; diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index 7926674..3926178 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -1413,32 +1413,39 @@ void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int he delete imgStr; } else { // isMask == true - ImageStream *imgStr = new ImageStream(str, width, 1, 1); - imgStr->reset(); + int size = (width + 7)/8; + + // PDF masks use 0 = draw current color, 1 = leave unchanged. + // We invert this to provide the standard interpretation of alpha + // (0 = transparent, 1 = opaque). If the colorMap already inverts + // the mask we leave the data unchanged. + int invert_bits = 0xff; + if (colorMap) { + GfxGray gray; + Guchar zero = 0; + colorMap->getGray(&zero, &gray); + if (colToByte(gray) == 0) + invert_bits = 0x00; + } - Guchar *png_row = (Guchar *)gmalloc( width ); + str->reset(); + Guchar *png_row = (Guchar *)gmalloc(size); for (int ri = 0; ri < height; ++ri) { - // read the row of the mask - Guchar *bit_row = imgStr->getLine(); - - // invert for PNG - for(int i = 0; i < width; i++) - png_row[i] = bit_row[i] ? 0xff : 0x00; + for(int i = 0; i < size; i++) + png_row[i] = str->getChar() ^ invert_bits; if (!writer->writeRow( &png_row )) { error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString()); delete writer; fclose(f1); - delete imgStr; gfree(png_row); return; } } - imgStr->close(); - delete imgStr; + str->close(); gfree(png_row); } diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 58f3cd8..110ba21 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -315,7 +315,7 @@ void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const GfxRGB rgb; GfxGray gray; Guchar zero = 0; - int invert_bits = 0xff; + int invert_bits; setFilename(ext); ++imgNum; @@ -341,8 +341,11 @@ void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const row = (unsigned char *) gmallocn(width, sizeof(unsigned int)); - // if 0 comes out as 0 in the color map, the we _flip_ stream bits - // otherwise we pass through stream bits unmolested + // PDF masks use 0 = draw current color, 1 = leave unchanged. + // We invert this to provide the standard interpretation of alpha + // (0 = transparent, 1 = opaque). If the colorMap already inverts + // the mask we leave the data unchanged. + invert_bits = 0xff; if (colorMap) { colorMap->getGray(&zero, &gray); if (colToByte(gray) == 0) diff --git a/utils/pdftocairo.cc b/utils/pdftocairo.cc index 192d295..841c388 100644 --- a/utils/pdftocairo.cc +++ b/utils/pdftocairo.cc @@ -370,7 +370,7 @@ void writePageImage(GooString *filename) int b = (*pixel & 0x000000ff) >> 0; // an arbitrary integer approximation of .3*r + .59*g + .11*b int y = (r*19661+g*38666+b*7209 + 32829)>>16; - if (tiff && mono) { + if (mono) { if (bit == 7) *rowp = 0; if (y > 127) commit e53aec2c61ba42cf0635dc05f8e27e3503c1eaac Author: Adrian Johnson <[email protected]> Date: Sun Aug 18 15:50:39 2013 +0930 Refactor ImageOutputDev to facilitate adding more output formats - Move PPM/PBM code into a NetPBMWriter class so PNGWriter and TiffWritersupport be added. - Create generic WriteRawIMage function for writing jpeg files so support for jpeg2000/jbig2 can be added. diff --git a/CMakeLists.txt b/CMakeLists.txt index 0da8c6d..1e03c5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,6 +264,7 @@ set(poppler_SRCS goo/GooString.cc goo/gmem.cc goo/FixedPoint.cc + goo/NetPBMWriter.cc goo/PNGWriter.cc goo/TiffWriter.cc goo/JpegWriter.cc diff --git a/goo/Makefile.am b/goo/Makefile.am index 0764e79..a48b20e 100644 --- a/goo/Makefile.am +++ b/goo/Makefile.am @@ -13,6 +13,7 @@ poppler_goo_include_HEADERS = \ gmem.h \ gfile.h \ FixedPoint.h \ + NetPBMWriter.h \ PNGWriter.h \ JpegWriter.h \ TiffWriter.h \ @@ -55,6 +56,7 @@ libgoo_la_SOURCES = \ GooString.cc \ gmem.cc \ FixedPoint.cc \ + NetPBMWriter.cc \ PNGWriter.cc \ JpegWriter.cc \ TiffWriter.cc \ diff --git a/goo/NetPBMWriter.cc b/goo/NetPBMWriter.cc new file mode 100644 index 0000000..fca00b2 --- /dev/null +++ b/goo/NetPBMWriter.cc @@ -0,0 +1,84 @@ +//======================================================================== +// +// NetPBMWriter.h +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== +// +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2005, 2007, 2011 Albert Astals Cid <[email protected]> +// Copyright (C) 2006 Rainer Keller <[email protected]> +// Copyright (C) 2008 Timothy Lee <[email protected]> +// Copyright (C) 2008 Vasile Gaburici <[email protected]> +// Copyright (C) 2009 Carlos Garcia Campos <[email protected]> +// Copyright (C) 2009 William Bader <[email protected]> +// Copyright (C) 2010 Jakob Voss <[email protected]> +// Copyright (C) 2012, 2013 Adrian Johnson <[email protected]> +// Copyright (C) 2013 Thomas Fischer <[email protected]> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "poppler-config.h" + +#include "NetPBMWriter.h" + +// Writer for the NetPBM formats (PBM and PPM) +// This format is documented at: +// http://netpbm.sourceforge.net/doc/pbm.html +// http://netpbm.sourceforge.net/doc/ppm.html + +NetPBMWriter::NetPBMWriter(Format formatA) : format(formatA) +{ +} + +bool NetPBMWriter::init(FILE *f, int widthA, int heightA, int hDPI, int vDPI) +{ + file = f; + width = widthA; + if (format == MONOCHROME) { + fprintf(file, "P4\n"); + fprintf(file, "%d %d\n", widthA, heightA); + } else { + fprintf(file, "P6\n"); + fprintf(file, "%d %d\n", widthA, heightA); + fprintf(file, "255\n"); + } + return true; +} + +bool NetPBMWriter::writePointers(unsigned char **rowPointers, int rowCount) +{ + for (int i = 0; i < rowCount; i++) + writeRow(&rowPointers[i]); + return true; +} + +bool NetPBMWriter::writeRow(unsigned char **row) +{ + if (format == MONOCHROME) { + // PBM uses 0 = white, 1 = black so we need to invert the colors + int size = (width + 7)/8; + for (int i = 0; i < size; i++) + fputc((*row)[i] ^ 0xff, file); + } else { + fwrite(*row, 1, width*3, file); + } + return true; +} + + +bool NetPBMWriter::close() +{ + return true; +} + diff --git a/goo/NetPBMWriter.h b/goo/NetPBMWriter.h new file mode 100644 index 0000000..21a19ee --- /dev/null +++ b/goo/NetPBMWriter.h @@ -0,0 +1,52 @@ +//======================================================================== +// +// NetPBMWriter.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2009 Stefan Thomas <[email protected]> +// Copyright (C) 2009, 2011 Albert Astals Cid <[email protected]> +// Copyright (C) 2010, 2013 Adrian Johnson <[email protected]> +// Copyright (C) 2010 Brian Cameron <[email protected]> +// Copyright (C) 2011 Thomas Freitag <[email protected]> +// +//======================================================================== + +#ifndef NETPBMWRITER_H +#define NETPBMWRITER_H + +#include "poppler-config.h" + +#include "ImgWriter.h" + +// Writer for the NetPBM formats (PBM and PPM) +// This format is documented at: +// http://netpbm.sourceforge.net/doc/pbm.html +// http://netpbm.sourceforge.net/doc/ppm.html + +class NetPBMWriter : public ImgWriter +{ +public: + + /* RGB - 3 bytes/pixel + * MONOCHROME - 8 pixels/byte + */ + enum Format { RGB, MONOCHROME }; + + NetPBMWriter(Format formatA = RGB); + ~NetPBMWriter() {}; + + bool init(FILE *f, int width, int height, int hDPI, int vDPI); + + bool writePointers(unsigned char **rowPointers, int rowCount); + bool writeRow(unsigned char **row); + + bool close(); + +private: + FILE *file; + Format format; + int width; +}; + +#endif diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 3b9427b..58f3cd8 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -41,6 +41,7 @@ #include <ctype.h> #include <math.h> #include "goo/gmem.h" +#include "goo/NetPBMWriter.h" #include "Error.h" #include "GfxState.h" #include "Object.h" @@ -280,177 +281,145 @@ void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, ++imgNum; } -void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str, - int width, int height, GBool invert, - GBool interpolate, GBool inlineImg) { +void ImageOutputDev::writeRawImage(Stream *str, const char *ext) { FILE *f; int c; - int size, i; - // dump JPEG file - if (dumpJPEG && str->getKind() == strDCT && !inlineImg) { + // open the image file + setFilename(ext); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } - // open the image file - setFilename("jpg"); - ++imgNum; - if (!(f = fopen(fileName, "wb"))) { - error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); - return; - } + // initialize stream + str = str->getNextStream(); + str->reset(); - // initialize stream - str = str->getNextStream(); - str->reset(); + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f); - // copy the stream - while ((c = str->getChar()) != EOF) - fputc(c, f); + str->close(); + fclose(f); +} - str->close(); - fclose(f); +void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const char *ext, + Stream *str, int width, int height, GfxImageColorMap *colorMap) { + FILE *f; + ImageStream *imgStr; + unsigned char *row; + unsigned char *rowp; + Guchar *p; + GfxRGB rgb; + GfxGray gray; + Guchar zero = 0; + int invert_bits = 0xff; - // dump PBM file - } else { + setFilename(ext); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } - // open the image file and write the PBM header - setFilename("pbm"); - ++imgNum; - if (!(f = fopen(fileName, "wb"))) { - error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); - return; - } - fprintf(f, "P4\n"); - fprintf(f, "%d %d\n", width, height); + if (!writer->init(f, width, height, 72, 72)) { + error(errIO, -1, "Error writing '{0:s}'", fileName); + return; + } + if (format != imgMonochrome) { + // initialize stream + imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), + colorMap->getBits()); + imgStr->reset(); + } else { // initialize stream str->reset(); + } + + row = (unsigned char *) gmallocn(width, sizeof(unsigned int)); + + // if 0 comes out as 0 in the color map, the we _flip_ stream bits + // otherwise we pass through stream bits unmolested + if (colorMap) { + colorMap->getGray(&zero, &gray); + if (colToByte(gray) == 0) + invert_bits = 0x00; + } - // copy the stream - size = height * ((width + 7) / 8); - for (i = 0; i < size; ++i) { - fputc(str->getChar(), f); + // for each line... + for (int y = 0; y < height; y++) { + switch (format) { + case imgRGB: + p = imgStr->getLine(); + rowp = row; + for (int x = 0; x < width; ++x) { + if (p) { + colorMap->getRGB(p, &rgb); + *rowp++ = colToByte(rgb.r); + *rowp++ = colToByte(rgb.g); + *rowp++ = colToByte(rgb.b); + p += colorMap->getNumPixelComps(); + } else { + *rowp++ = 0; + *rowp++ = 0; + *rowp++ = 0; + } + } + writer->writeRow(&row); + break; + + case imgMonochrome: + int size = (width + 7)/8; + for (int x = 0; x < size; x++) + row[x] = str->getChar() ^ invert_bits; + writer->writeRow(&row); + break; } + } - str->close(); - fclose(f); + gfree(row); + if (format != imgMonochrome) { + imgStr->close(); + delete imgStr; } + str->close(); + writer->close(); + fclose(f); } void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, int width, int height, - GfxImageColorMap *colorMap, - GBool interpolate, int *maskColors, GBool inlineImg) { - FILE *f; - ImageStream *imgStr; - Guchar *p; - Guchar zero = 0; - GfxGray gray; - GfxRGB rgb; - int x, y; - int c; - int size, i; - int pbm_mask = 0xff; + GfxImageColorMap *colorMap, GBool inlineImg) { + ImageFormat format; - // dump JPEG file if (dumpJPEG && str->getKind() == strDCT && (colorMap->getNumPixelComps() == 1 || colorMap->getNumPixelComps() == 3) && !inlineImg) { - // open the image file - setFilename("jpg"); - ++imgNum; - if (!(f = fopen(fileName, "wb"))) { - error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); - return; - } - - // initialize stream - str = str->getNextStream(); - str->reset(); - - // copy the stream - while ((c = str->getChar()) != EOF) - fputc(c, f); - - str->close(); - fclose(f); - - // dump PBM file - } else if (colorMap->getNumPixelComps() == 1 && - colorMap->getBits() == 1) { - - // open the image file and write the PBM header - setFilename("pbm"); - ++imgNum; - if (!(f = fopen(fileName, "wb"))) { - error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); - return; - } - fprintf(f, "P4\n"); - fprintf(f, "%d %d\n", width, height); - - // initialize stream - str->reset(); - - // if 0 comes out as 0 in the color map, the we _flip_ stream bits - // otherwise we pass through stream bits unmolested - colorMap->getGray(&zero, &gray); - if(colToByte(gray)) - pbm_mask = 0; - - // copy the stream - size = height * ((width + 7) / 8); - for (i = 0; i < size; ++i) { - fputc(str->getChar() ^ pbm_mask, f); - } - - str->close(); - fclose(f); + // dump JPEG file + writeRawImage(str, "jpg"); - // dump PPM file } else { + ImgWriter *writer; - // open the image file and write the PPM header - setFilename("ppm"); - ++imgNum; - if (!(f = fopen(fileName, "wb"))) { - error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); - return; + // dump PBM file + if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { + writer = new NetPBMWriter(NetPBMWriter::MONOCHROME); + format = imgMonochrome; + } else { + writer = new NetPBMWriter(NetPBMWriter::RGB); + format = imgRGB; } - fprintf(f, "P6\n"); - fprintf(f, "%d %d\n", width, height); - fprintf(f, "255\n"); - - // initialize stream - imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), - colorMap->getBits()); - imgStr->reset(); - // for each line... - for (y = 0; y < height; ++y) { - - // write the line - if ((p = imgStr->getLine())) { - for (x = 0; x < width; ++x) { - colorMap->getRGB(p, &rgb); - fputc(colToByte(rgb.r), f); - fputc(colToByte(rgb.g), f); - fputc(colToByte(rgb.b), f); - p += colorMap->getNumPixelComps(); - } - } else { - for (x = 0; x < width; ++x) { - fputc(0, f); - fputc(0, f); - fputc(0, f); - } - } - } - imgStr->close(); - delete imgStr; + writeImageFile(writer, format, + format == imgRGB ? "ppm" : "pbm", + str, width, height, colorMap); - fclose(f); + delete writer; } } @@ -469,7 +438,7 @@ void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, if (listImages) listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgStencil); else - writeMask(state, ref, str, width, height, invert, interpolate, inlineImg); + writeImage(state, ref, str, width, height, NULL, inlineImg); } void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, @@ -479,7 +448,7 @@ void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, if (listImages) listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage); else - writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); + writeImage(state, ref, str, width, height, colorMap, inlineImg); } void ImageOutputDev::drawMaskedImage( @@ -490,9 +459,8 @@ void ImageOutputDev::drawMaskedImage( listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage); listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask); } else { - drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); - drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert, - maskInterpolate, gFalse); + writeImage(state, ref, str, width, height, colorMap, gFalse); + writeImage(state, ref, maskStr, maskWidth, maskHeight, NULL, gFalse); } } @@ -505,8 +473,7 @@ void ImageOutputDev::drawSoftMaskedImage( listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage); listImage(state, ref, maskStr, maskWidth, maskHeight, maskColorMap, maskInterpolate, gFalse, imgSmask); } else { - drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); - drawImage(state, ref, maskStr, maskWidth, maskHeight, - maskColorMap, maskInterpolate, NULL, gFalse); + writeImage(state, ref, str, width, height, colorMap, gFalse); + writeImage(state, ref, maskStr, maskWidth, maskHeight, maskColorMap, gFalse); } } diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index 13911ed..d89836f 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -36,6 +36,7 @@ #include <stdio.h> #include "goo/gtypes.h" +#include "goo/ImgWriter.h" #include "OutputDev.h" class GfxState; @@ -52,6 +53,10 @@ public: imgMask, imgSmask }; + enum ImageFormat { + imgRGB, + imgMonochrome + }; // Create an OutputDev which will write images to files named // <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if @@ -128,13 +133,11 @@ private: GfxImageColorMap *colorMap, GBool interpolate, GBool inlineImg, ImageType imageType); - void writeMask(GfxState *state, Object *ref, Stream *str, - int width, int height, GBool invert, - GBool interpolate, GBool inlineImg); void writeImage(GfxState *state, Object *ref, Stream *str, - int width, int height, GfxImageColorMap *colorMap, - GBool interpolate, int *maskColors, GBool inlineImg); - + int width, int height, GfxImageColorMap *colorMap, GBool inlineImg); + void writeRawImage(Stream *str, const char *ext); + void writeImageFile(ImgWriter *writer, ImageFormat format, const char *ext, + Stream *str, int width, int height, GfxImageColorMap *colorMap); char *fileRoot; // root of output file names char *fileName; // buffer for output file names commit 0ca0fcc9f536a57365048914cd8a8cc3eb5ed4fd Author: Adrian Johnson <[email protected]> Date: Sat Aug 17 15:24:43 2013 +0930 pdfimages: fix bug in -list output Images of type /ImageMask should have type 'stencil'. diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 1d1064b..3b9427b 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -467,7 +467,7 @@ void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, GBool interpolate, GBool inlineImg) { if (listImages) - listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask); + listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgStencil); else writeMask(state, ref, str, width, height, invert, interpolate, inlineImg); } commit f8ee5a931c795013d17f73f083b6e6f9a683d061 Author: Adrian Johnson <[email protected]> Date: Sat Aug 17 15:17:11 2013 +0930 pdfimages: print size, ratio, and ppi diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 8b18d2b..1d1064b 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -20,7 +20,7 @@ // Copyright (C) 2009 Carlos Garcia Campos <[email protected]> // Copyright (C) 2009 William Bader <[email protected]> // Copyright (C) 2010 Jakob Voss <[email protected]> -// Copyright (C) 2012 Adrian Johnson <[email protected]> +// Copyright (C) 2012, 2013 Adrian Johnson <[email protected]> // Copyright (C) 2013 Thomas Fischer <[email protected]> // // To see a description of the changes please see the Changelog file that @@ -39,6 +39,7 @@ #include <stdlib.h> #include <stddef.h> #include <ctype.h> +#include <math.h> #include "goo/gmem.h" #include "Error.h" #include "GfxState.h" @@ -58,8 +59,8 @@ ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEG pageNum = 0; ok = gTrue; if (listImages) { - printf("page num type width height color comp bpc enc interp object ID\n"); - printf("---------------------------------------------------------------------\n"); + printf("page num type width height color comp bpc enc interp object ID x-ppi y-ppi size ratio\n"); + printf("--------------------------------------------------------------------------------------------\n"); } } @@ -79,6 +80,34 @@ void ImageOutputDev::setFilename(const char *fileExt) { } } + +// Print a floating point number between 0 - 9999 using 4 characters +// eg '1.23', '12.3', ' 123', '1234' +// +// We need to be careful to handle the cases where rounding adds an +// extra digit before the decimal. eg printf("%4.2f", 9.99999) +// outputs "10.00" instead of "9.99". +static void printNumber(double d) +{ + char buf[10]; + + if (d < 10.0) { + sprintf(buf, "%4.2f", d); + buf[4] = 0; + printf("%s", buf); + } else if (d < 100.0) { + sprintf(buf, "%4.1f", d); + if (!isdigit(buf[3])) { + buf[3] = 0; + printf(" %s", buf); + } else { + printf("%s", buf); + } + } else { + printf("%4.0f", d); + } +} + void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, @@ -179,18 +208,75 @@ void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, printf("%-3s ", interpolate ? "yes" : "no"); if (inlineImg) { - printf("[inline]\n"); + printf("[inline] "); } else if (ref->isRef()) { const Ref imageRef = ref->getRef(); if (imageRef.gen >= 100000) { - printf("[none]\n"); + printf("[none] "); } else { - printf(" %6d %2d\n", imageRef.num, imageRef.gen); + printf(" %6d %2d ", imageRef.num, imageRef.gen); } } else { - printf("[none]\n"); + printf("[none] "); + } + + double *mat = state->getCTM(); + double width2 = mat[0] + mat[2]; + double height2 = mat[1] + mat[3]; + double xppi = fabs(width*72.0/width2) + 0.5; + double yppi = fabs(height*72.0/height2) + 0.5; + if (xppi < 1.0) + printf("%5.3f ", xppi); + else + printf("%5.0f ", xppi); + if (yppi < 1.0) + printf("%5.3f ", yppi); + else + printf("%5.0f ", yppi); + + Goffset embedSize = -1; + if (!inlineImg) + embedSize = str->getBaseStream()->getLength(); + + long long imageSize = 0; + if (colorMap && colorMap->isOk()) + imageSize = ((long long)width * height * colorMap->getNumPixelComps() * colorMap->getBits())/8; + else + imageSize = (long long)width*height/8; // mask + + double ratio = -1.0; + if (imageSize > 0) + ratio = 100.0*embedSize/imageSize; + + if (embedSize < 0) { + printf(" - "); + } else if (embedSize <= 9999) { + printf("%4lldB", embedSize); + } else { + double d = embedSize/1024.0; + if (d <= 9999.0) { + printNumber(d); + putchar('K'); + } else { + d /= 1024.0; + if (d <= 9999.0) { + printNumber(d); + putchar('M'); + } else { + d /= 1024.0; + printNumber(d); + putchar('G'); + } + } } + if (ratio > 9.9) + printf(" %3.0f%%\n", ratio); + else if (ratio >= 0.0) + printf(" %3.1f%%\n", ratio); + else + printf(" - \n"); + ++imgNum; } diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 955d8b3..e98bf29 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -134,6 +134,18 @@ ccitt - CCITT Group 3 or Group 4 Fax .TP .B object ID the font dictionary object ID (number and generation) +.TP +.B x\-ppi +The horizontal resolution of the image (in pixels per inch) when rendered on the pdf page. +.TP +.B y\-ppi +The vertical resolution of the image (in pixels per inch) when rendered on the pdf page. +.TP +.B size +The size of the embedded image in the pdf file. The following suffixes are used: 'B' bytes, 'K' kilobytes, 'M' megabytes, and 'G' gigabytes. +.TP +.B ratio +The compression ratio of the embedded image. .RE .TP .BI \-opw " password" diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 82c301c..7b6cad0 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -18,7 +18,7 @@ // Copyright (C) 2007-2008, 2010 Albert Astals Cid <[email protected]> // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2010 Jakob Voss <[email protected]> -// Copyright (C) 2012 Adrian Johnson <[email protected]> +// Copyright (C) 2012, 2013 Adrian Johnson <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
