Re: [poppler] Creating PDF with poppler ?

Thomas Freitag Thu, 31 Mar 2011 04:54:49 -0700

On 25.03.2011 21:42, Albert Astals Cid wrote:

A Dilluns, 20 de desembre de 2010, Thomas Freitag va escriure:

Hi all!

Hi

Hi again!

Sorry for this late but therefore now more correct answer. As youprobably know I was very busy implementing overprint, and after that Itook some "free poppler" days!

How I promised long time ago, here now a patch for the poppler writing
routines and some new helper function, together with two small sample
programs using them.

I'm resending the patch because what you sent was unnecesarily big and scary
(which diff program you use?).

I took now Your resent patch as base, completely reviewed it (hopefullynothing missing) and add the small utility programs pdfextract andpdfmerge to the patch after learning a little bit more about git. Thispatch is now created under ubuntu, so hopefully it is not bigger asnecessary and not so scary. I also renamed the methods according to thepoppler naming conventions, and the utility programs now give up with anerror message, if the or one of the PDFs are encrypted instead ofcreating unreadable PDF(s).

Basically there are a few stilistic things i don't like (please don't use
this->)

I removed them now.

Also do you really need to have things like ExtractPage in PDFDoc? Not sure
they make sense to me there

It would be a greater task to move it to pdfextract (and in addition allthe mark methods), but it is probably doable. But on the other handthere are already routines like saveAs in PDFDoc, so I just rename it tosavePageAs to clearify it. But if You still insist to remove them, I'lltry it.

Hopefully we are not doing work twice. As I read once again Yourposting, I thought that I should do something more on it.


Thomas

Albert


_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc
new file mode 100644
index 0000000..c8c4749
--- /dev/null
+++ b/utils/pdfextract.cc
@@ -0,0 +1,111 @@
+//========================================================================
+//
+// pdfextract.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <[email protected]>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+  {"-f", argInt, &firstPage, 0,
+   "first page to extract"},
+  {"-l", argInt, &lastPage, 0,
+   "last page to extract"},
+  {"-v", argFlag, &printVersion, 0,
+   "print copyright and version info"},
+  {"-h", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"--help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-?", argFlag, &printHelp, 0,
+   "print usage information"},
+  {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+  char pathName[1024];
+  GooString *gfileName = new GooString (srcFileName);
+  PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+  if (!doc->isOk()) {
+    error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
+    return false;
+  }
+  if (doc->isEncrypted()) {
+    error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
+    return false;
+  }
+
+  if (firstPage == 0 && lastPage == 0) {
+    firstPage = 1;
+    lastPage = doc->getNumPages();
+  }
+  if (lastPage == 0)
+    lastPage = doc->getNumPages();
+  if (firstPage == 0)
+    firstPage = 1;
+  for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+    sprintf (pathName, destFileName, pageNo);
+    GooString *gpageName = new GooString (pathName);
+    int errCode = doc->savePageAs(gpageName, pageNo);
+    if ( errCode != errNone) {
+      delete gpageName;
+      delete gfileName;
+      return false;
+    }
+    delete gpageName;
+  }
+  delete gfileName;
+  return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+  Object info;
+  GBool ok;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs (argDesc, &argc, argv);
+  if (!ok || argc != 3 || printVersion || printHelp)
+    {
+      fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION);
+      fprintf (stderr, "%s\n", popplerCopyright);
+      fprintf (stderr, "%s\n", xpdfCopyright);
+      if (!printVersion)
+	{
+	  printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>",
+		      argDesc);
+	}
+      if (printVersion || printHelp)
+	exitCode = 0;
+      goto err0;
+    }
+  extractPages (argv[1], argv[2]);
+
+err0:
+
+  return exitCode;
+}
diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc
new file mode 100644
index 0000000..e5b5a0c
--- /dev/null
+++ b/utils/pdfmerge.cc
@@ -0,0 +1,167 @@
+//========================================================================
+//
+// pdfmerge.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <[email protected]>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+  {"-v", argFlag, &printVersion, 0,
+   "print copyright and version info"},
+  {"-h", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"--help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-?", argFlag, &printHelp, 0,
+   "print usage information"},
+  {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+  int objectsCount = 0;
+  Guint numOffset = 0;
+  std::vector<Object> pages;
+  std::vector<Guint> offsets;
+  XRef *yRef;
+  FILE *f;
+  OutStream *outStr;
+  int i;
+  int j, rootNum;
+  std::vector<PDFDoc *>docs;
+  double version = 0;
+  char *fileName = argv[argc - 1];
+  int exitCode;
+
+  exitCode = 99;
+  if (argc <= 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
+    fprintf(stderr, "%s\n", popplerCopyright);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
+	argDesc);
+    }
+    if (printVersion || printHelp)
+      exitCode = 0;
+    return exitCode;
+  }
+  exitCode = 0;
+
+  for (i = 1; i < argc - 1; i++) {
+    GooString *gfileName = new GooString(argv[i]);
+    PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+    if (doc->isOk() && !doc->isEncrypted()) {
+      docs.push_back(doc);
+      if ((doc->getPDFMajorVersion() + doc->getPDFMinorVersion() / 10.0) > version)
+        version = doc->getPDFMajorVersion() + doc->getPDFMinorVersion() / 10.0;
+    } else if (doc->isOk()) {
+      error(-1, "Could not merge encrypted files ('%s')", argv[i]);
+      return -1;
+    } else {
+      error(-1, "Could not merge damaged documents ('%s')", argv[i]);
+      return -1;
+    }
+  }
+
+  if (!(f = fopen(fileName, "wb"))) {
+    error(-1, "Could not open file '%s'", fileName);
+    return -1;
+  }
+  outStr = new FileOutStream(f, 0);
+
+  yRef = new XRef();
+  yRef->add(0, 65535, 0, gFalse);
+  PDFDoc::writeHeader(outStr, version);
+
+  for (i = 0; i < (int) docs.size(); i++) {
+    for (j = 1; j <= docs[i]->getNumPages(); j++) {
+      PDFRectangle *cropBox = NULL;
+      if (docs[i]->getCatalog()->getPage(j)->isCropped())
+        cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+      docs[i]->replacePageDict(j,
+	    docs[i]->getCatalog()->getPage(j)->getRotate(),
+	    docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+      Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+      Object page;
+      docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+      pages.push_back(page);
+      offsets.push_back(numOffset);
+      Dict *pageDict = page.getDict();
+      docs[i]->markPageObjects(pageDict, yRef, numOffset);
+    }
+    objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+    numOffset = yRef->getNumObjects() + 1;
+  }
+
+  rootNum = yRef->getNumObjects() + 1;
+  yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+  outStr->printf("%d 0 obj\n", rootNum);
+  outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+  outStr->printf(">>\nendobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 1);
+  outStr->printf("<< /Type /Pages /Kids [");
+  for (j = 0; j < (int) pages.size(); j++)
+    outStr->printf(" %d 0 R", rootNum + j + 2);
+  outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+  objectsCount++;
+
+  for (i = 0; i < (int) pages.size(); i++) {
+    yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+    outStr->printf("%d 0 obj\n", rootNum + i + 2);
+    outStr->printf("<< ");
+    Dict *pageDict = pages[i].getDict();
+    for (j = 0; j < pageDict->getLength(); j++) {
+      if (j > 0)
+	outStr->printf(" ");
+      const char *key = pageDict->getKey(j);
+      Object value;
+      pageDict->getValNF(j, &value);
+      if (strcmp(key, "Parent") == 0) {
+        outStr->printf("/Parent %d 0 R", rootNum + 1);
+      } else {
+        outStr->printf("/%s ", key);
+        PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+      }
+      value.free();
+    }
+    outStr->printf(" >>\nendobj\n");
+    objectsCount++;
+  }
+  Guint uxrefOffset = outStr->getPos();
+  yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
+
+  Ref ref;
+  ref.num = rootNum;
+  ref.gen = 0;
+  PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
+	&ref, yRef, fileName, outStr->getPos());
+
+  outStr->close();
+  fclose(f);
+  delete yRef;
+  for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+  for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+  return exitCode;
+}
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index adbaebe..8c75a4e 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -107,6 +107,7 @@ void PDFDoc::init()
   startXRefPos = ~(Guint)0;
   secHdlr = NULL;
   pageCache = NULL;
+  countRef = new XRef();
 }
 
 PDFDoc::PDFDoc()
@@ -314,6 +315,9 @@ PDFDoc::~PDFDoc() {
   if (fileName) {
     delete fileName;
   }
+  if (countRef) {
+    delete countRef;
+  }
 }
 
 
@@ -573,6 +577,120 @@ Hints *PDFDoc::getHints()
   return hints;
 }
 
+int PDFDoc::savePageAs(GooString *name, int pageNo) 
+{
+  FILE *f;
+  OutStream *outStr;
+  XRef *yRef;
+  int n;
+  int rootNum = getXRef()->getSize() + 1;
+
+  if (pageNo < 1 || pageNo > getNumPages()) {
+    error(-1, "Illegal pageNo: %d(%d)", pageNo, getNumPages() );
+    return errOpenFile;
+  }
+  PDFRectangle *cropBox = NULL;
+  if (getCatalog()->getPage(pageNo)->isCropped())
+    cropBox = getCatalog()->getPage(pageNo)->getCropBox();
+  replacePageDict(pageNo, 
+    getCatalog()->getPage(pageNo)->getRotate(),
+    getCatalog()->getPage(pageNo)->getMediaBox(),
+    cropBox, NULL);
+  Ref *refPage = getCatalog()->getPageRef(pageNo);
+  Object page;
+  getXRef()->fetch(refPage->num, refPage->gen, &page);
+
+  if (!(f = fopen(name->getCString(), "wb"))) {
+    error(-1, "Couldn't open file '%s'", name->getCString());
+    return errOpenFile;
+  }
+  outStr = new FileOutStream(f,0);
+
+  yRef = new XRef();
+  countRef = new XRef();
+  yRef->add(0, 65535, 0, gFalse);
+  writeHeader(outStr, (double) getPDFMajorVersion () + getPDFMinorVersion() / 10.0);
+
+  // get and mark optional content groups
+  OCGs *ocgs = getCatalog()->getOptContentConfig();
+  if (ocgs != NULL) {
+    Object catDict, optContentProps;
+    getXRef()->getCatalog(&catDict);
+    catDict.dictLookup("OCProperties", &optContentProps);
+    Dict *pageDict = optContentProps.getDict();
+    markPageObjects(pageDict, yRef, 0);
+    catDict.free();
+    optContentProps.free();
+  }
+
+  Dict *pageDict = page.getDict();
+  markPageObjects(pageDict, yRef, 0);
+  Guint objectsCount = writePageObjects(outStr, yRef, 0);
+
+  yRef->add(rootNum,0,outStr->getPos(),gTrue);
+  outStr->printf("%d 0 obj\n", rootNum);
+  outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); 
+  if (ocgs != NULL) {
+    Object catDict, optContentProps;
+    getXRef()->getCatalog(&catDict);
+    catDict.dictLookup("OCProperties", &optContentProps);
+    outStr->printf(" /OCProperties <<");
+    Dict *pageDict = optContentProps.getDict();
+    for (n = 0; n < pageDict->getLength(); n++) {
+      if (n > 0) outStr->printf(" ");
+      const char *key = pageDict->getKey(n);
+      Object value; pageDict->getValNF(n, &value);
+      outStr->printf("/%s ", key);
+      writeObject(&value, NULL, outStr, getXRef(), 0);
+      value.free();
+    }
+    outStr->printf(" >> ");
+    catDict.free();
+    optContentProps.free();
+  }
+  outStr->printf(">>\nendobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 1);
+  outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 2);
+  outStr->printf("endobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 2);
+  outStr->printf("<< ");
+  for (n = 0; n < pageDict->getLength(); n++) {
+    if (n > 0) outStr->printf(" ");
+    const char *key = pageDict->getKey(n);
+    Object value; pageDict->getValNF(n, &value);
+    if (strcmp(key, "Parent") == 0) {
+      outStr->printf("/Parent %d 0 R", rootNum + 1);
+    } else {
+      outStr->printf("/%s ", key);
+      writeObject(&value, NULL, outStr, getXRef(), 0); 
+    }
+    value.free();
+  }
+  outStr->printf(" >>\nendobj\n");
+  objectsCount++;
+  page.free();
+
+  Guint uxrefOffset = outStr->getPos();
+  yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
+
+  Ref ref;
+  ref.num = rootNum;
+  ref.gen = 0;
+  writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), name->getCString(), outStr->getPos());
+
+  outStr->close();
+  fclose(f);
+  delete yRef;
+
+  return errNone;
+}
+
 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
   FILE *f;
   OutStream *outStr;
@@ -740,7 +858,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 
 }
 
-void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
+void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset)
 {
   Object obj1;
   outStr->printf("<<");
@@ -749,7 +867,7 @@ void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
     outStr->printf("/%s ", keyNameToPrint->getCString());
     delete keyNameToPrint;
-    writeObject(dict->getValNF(i, &obj1), NULL, outStr);
+    writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset);
     obj1.free();
   }
   outStr->printf(">> ");
@@ -805,18 +923,23 @@ void PDFDoc::writeString (GooString* s, OutStream* outStr)
     const char* c = s->getCString();
     outStr->printf("(");
     for(int i=0; i<s->getLength(); i++) {
-      char unescaped = (*c)&0x000000ff;
+      char unescaped = *(c+i)&0x000000ff;
       //escape if needed
-      if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
-        outStr->printf("%c", '\\');
-      outStr->printf("%c", unescaped);
-      c++;
+	  if (unescaped == '\r')
+        outStr->printf("\\r");
+	  else if (unescaped == '\n')
+        outStr->printf("\\n");
+	  else {
+		  if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
+			outStr->printf("%c", '\\');
+		  outStr->printf("%c", unescaped);
+	  }
     }
     outStr->printf(") ");
   }
 }
 
-Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
+Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef *xRef, Guint numOffset)
 {
   Array *array;
   Object obj1;
@@ -858,13 +981,13 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
       array = obj->getArray();
       outStr->printf("[");
       for (int i=0; i<array->getLength(); i++) {
-        writeObject(array->getNF(i, &obj1), NULL,outStr);
+        writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset);
         obj1.free();
       }
       outStr->printf("] ");
       break;
     case objDict:
-      writeDictionnary (obj->getDict(),outStr);
+      writeDictionnary (obj->getDict(),outStr, xRef, numOffset);
       break;
     case objStream: 
       {
@@ -886,7 +1009,7 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
           stream->getDict()->remove("Filter");
           stream->getDict()->remove("DecodeParms");
 
-          writeDictionnary (stream->getDict(),outStr);
+          writeDictionnary (stream->getDict(),outStr, xRef, numOffset);
           writeStream (stream,outStr);
           obj1.free();
         } else {
@@ -896,23 +1019,23 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
             BaseStream *bs = fs->getBaseStream();
             if (bs) {
               Guint streamEnd;
-                if (xref->getStreamEnd(bs->getStart(), &streamEnd)) {
+                if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
                   Object val;
                   val.initInt(streamEnd - bs->getStart());
                   stream->getDict()->set("Length", &val);
                 }
               }
           }
-          writeDictionnary (stream->getDict(), outStr);
+          writeDictionnary (stream->getDict(), outStr, xRef, numOffset);
           writeRawStream (stream, outStr);
         }
         break;
       }
     case objRef:
-      outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen);
+      outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
       break;
     case objCmd:
-      outStr->printf("cmd\r\n");
+      outStr->printf("%s\n", obj->getCmd());
       break;
     case objError:
       outStr->printf("error\r\n");
@@ -932,15 +1055,17 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
   return offset;
 }
 
-void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, 
+			  OutStream* outStr, GBool incrUpdate,
+			  Guint startxRef, Ref *root, XRef *xRef, const char *fileName,
+			  Guint fileSize)
 {
-  Dict *trailerDict = new Dict(xref);
+  Dict *trailerDict = new Dict(xRef);
   Object obj1;
   obj1.initInt(uxrefSize);
   trailerDict->set("Size", &obj1);
   obj1.free();
 
-
   //build a new ID, as recommended in the reference, uses:
   // - current time
   // - file name
@@ -950,23 +1075,13 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
   char buffer[256];
   sprintf(buffer, "%i", (int)time(NULL));
   message.append(buffer);
-  if (fileName)
-    message.append(fileName);
-  else
-    message.append("streamwithoutfilename.pdf");
-  // file size
-  unsigned int fileSize = 0;
-  int c;
-  str->reset();
-  while ((c = str->getChar()) != EOF) {
-    fileSize++;
-  }
-  str->close();
+  message.append(fileName);
+
   sprintf(buffer, "%i", fileSize);
   message.append(buffer);
 
   //info dict -- only use text string
-  if (xref->getDocInfo(&obj1)->isDict()) {
+  if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
     for(int i=0; i<obj1.getDict()->getLength(); i++) {
       Object obj2;
       obj1.getDict()->getVal(i, &obj2);  
@@ -985,11 +1100,11 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
 
   //create ID array
   Object obj2,obj3,obj4,obj5;
-  obj2.initArray(xref);
+  obj2.initArray(xRef);
 
   if (incrUpdate) {
     //only update the second part of the array
-    if(xref->getTrailerDict()->getDict()->lookup("ID", &obj4) != NULL) {
+    if(xRef->getTrailerDict()->getDict()->lookup("ID", &obj4) != NULL) {
       if (!obj4.isArray()) {
         error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue");
       } else {
@@ -1009,22 +1124,23 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
     trailerDict->set("ID", &obj2);
   }
 
-
-  obj1.initRef(xref->getRootNum(), xref->getRootGen());
+  obj1.initRef(root->num, root->gen);
   trailerDict->set("Root", &obj1);
 
   if (incrUpdate) { 
-    obj1.initInt(getStartXRef());
+    obj1.initInt(startxRef);
     trailerDict->set("Prev", &obj1);
   }
   
-  xref->getDocInfoNF(&obj5);
-  if (!obj5.isNull()) {
-    trailerDict->set("Info", &obj5);
+  if (!xRef->getTrailerDict()->isNone()) {
+    xRef->getDocInfoNF(&obj5);
+    if (!obj5.isNull()) {
+      trailerDict->set("Info", &obj5);
+    }
   }
   
   outStr->printf( "trailer\r\n");
-  writeDictionnary(trailerDict, outStr);
+  writeDictionnary(trailerDict, outStr, xRef, 0);
   outStr->printf( "\r\nstartxref\r\n");
   outStr->printf( "%i\r\n", uxrefOffset);
   outStr->printf( "%%%%EOF\r\n");
@@ -1032,6 +1148,221 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
   delete trailerDict;
 }
 
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
+{
+  char *fileNameA;
+  if (fileName)
+    fileNameA = fileName->getCString();
+  else
+    fileNameA = "streamwithoutfilename.pdf";
+  // file size
+  unsigned int fileSize = 0;
+  int c;
+  str->reset();
+  while ((c = str->getChar()) != EOF) {
+    fileSize++;
+  }
+  str->close();
+  Ref ref;
+  ref.num = getXRef()->getRootNum();
+  ref.gen = getXRef()->getRootGen();
+  writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
+}
+
+void PDFDoc::writeHeader(OutStream *outStr, double version)
+{
+   outStr->printf("%%PDF-%.1f\n", version);
+   outStr->printf("%%\xE2\xE3\xCF\xD3\n");
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, Guint numOffset)
+///////////////////////////////////////////////////////////////////////////
+// insert referenced objects in dictionnary in XRef
+///////////////////////////////////////////////////////////////////////////
+{
+  Object obj1;
+  for (int i=0; i<dict->getLength(); i++) {
+    markObject(dict->getValNF(i, &obj1), xRef, numOffset);
+    obj1.free();
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::markObject (Object* obj, XRef *xRef, Guint numOffset)
+///////////////////////////////////////////////////////////////////////////
+// insert referenced objects in XRef
+///////////////////////////////////////////////////////////////////////////
+{
+  Array *array;
+  Object obj1;
+
+  switch (obj->getType()) {
+    case objArray:
+      array = obj->getArray();
+      for (int i=0; i<array->getLength(); i++) {
+        markObject(array->getNF(i, &obj1), xRef, numOffset);
+        obj1.free();
+      }
+      break;
+    case objDict:
+      markDictionnary (obj->getDict(), xRef, numOffset);
+      break;
+    case objStream: 
+      {
+        Stream *stream = obj->getStream();
+		markDictionnary (stream->getDict(), xRef, numOffset);
+      }
+      break;
+    case objRef:
+      {
+        if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
+	  if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree)
+	    return;  // already marked as free => should be replaced
+          xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
+	  if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed)
+	    xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
+	}
+	if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() || 
+	  countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
+	  countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
+	} else {
+	  XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
+	  entry->gen++;
+	} 
+	Object obj1;
+	getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
+	markObject(&obj1, xRef, numOffset);
+	obj1.free();
+      }
+      break;
+    default:
+      break;
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::replacePageDict(int pageNo, int rotate,
+			PDFRectangle *mediaBox, 
+			PDFRectangle *cropBox, Object *pageCTM)
+///////////////////////////////////////////////////////////////////////////
+// rewrite pageDict with MediaBox, CropBox and new page CTM
+///////////////////////////////////////////////////////////////////////////
+{
+  Ref *refPage = getCatalog()->getPageRef(pageNo);
+  Object page;
+  getXRef()->fetch(refPage->num, refPage->gen, &page);
+  Dict *pageDict = page.getDict();
+  pageDict->remove("MediaBox");
+  pageDict->remove("CropBox");
+  pageDict->remove("ArtBox");
+  pageDict->remove("BleedBox");
+  pageDict->remove("TrimBox");
+  pageDict->remove("Rotate");
+  Object *mediaBoxObj = new Object();
+  mediaBoxObj->initArray(getXRef());
+  Object *murx = new Object();
+  murx->initReal(mediaBox->x1);
+  Object *mury = new Object();
+  mury->initReal(mediaBox->y1);
+  Object *mllx = new Object();
+  mllx->initReal(mediaBox->x2);
+  Object *mlly = new Object();
+  mlly->initReal(mediaBox->y2);
+  mediaBoxObj->arrayAdd(murx);
+  mediaBoxObj->arrayAdd(mury);
+  mediaBoxObj->arrayAdd(mllx);
+  mediaBoxObj->arrayAdd(mlly);
+  pageDict->add(copyString("MediaBox"), mediaBoxObj);
+  if (cropBox != NULL) {
+    Object *cropBoxObj = new Object();
+    cropBoxObj->initArray(getXRef());
+    Object *curx = new Object();
+    curx->initReal(cropBox->x1);
+    Object *cury = new Object();
+    cury->initReal(cropBox->y1);
+    Object *cllx = new Object();
+    cllx->initReal(cropBox->x2);
+    Object *clly = new Object();
+    clly->initReal(cropBox->y2);
+    cropBoxObj->arrayAdd(curx);
+    cropBoxObj->arrayAdd(cury);
+    cropBoxObj->arrayAdd(cllx);
+    cropBoxObj->arrayAdd(clly);
+    pageDict->add(copyString("CropBox"), cropBoxObj);
+  }
+  Object *rotateObj = new Object();
+  rotateObj->initInt(rotate);
+  pageDict->add(copyString("Rotate"), rotateObj);
+  if (pageCTM != NULL) {
+    Object *contents = new Object();
+    Ref cmRef = getXRef()->addIndirectObject(pageCTM);
+    Object *ref = new Object();
+    ref->initRef(cmRef.num, cmRef.gen);
+    pageDict->lookupNF("Contents", contents);
+    Object *newContents = new Object();
+    newContents->initArray(getXRef());
+    if (contents->getType() == objRef) {
+      newContents->arrayAdd(ref);
+      newContents->arrayAdd(contents);
+    } else {
+      newContents->arrayAdd(ref);
+      for (int i = 0; i < contents->arrayGetLength(); i++) {
+	Object *contentEle = new Object();
+	contents->arrayGetNF(i, contentEle);
+	newContents->arrayAdd(contentEle);
+      }
+    }
+    pageDict->remove("Contents");
+    pageDict->add(copyString("Contents"), newContents);
+  }
+  getXRef()->setModifiedObject(&page, *refPage);
+  page.free();
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, Guint numOffset) 
+///////////////////////////////////////////////////////////////////////////
+// write all objects used by pageDict to outStr
+///////////////////////////////////////////////////////////////////////////
+{
+  int n;
+
+  for (n = 0; n < pageDict->getLength(); n++) {
+    const char *key = pageDict->getKey(n);
+    Object value; pageDict->getValNF(n, &value);
+    if (strcmp(key, "Parent") != 0) {
+      markObject(&value, xRef, numOffset);
+    }
+    value.free();
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////
+Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset) 
+///////////////////////////////////////////////////////////////////////////
+// write all objects used by pageDict to outStr
+///////////////////////////////////////////////////////////////////////////
+{
+  int n;
+  Guint objectsCount = 0; //count the number of objects in the XRef(s)
+
+  for (n=numOffset; n < xRef->getNumObjects(); n++) {
+    if (xRef->getEntry(n)->type != xrefEntryFree) {
+      Object obj;
+      Ref ref;
+      ref.num = n;
+      ref.gen = xRef->getEntry(n)->gen;
+      objectsCount++;
+      getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
+      Guint offset = writeObject(&obj, &ref, outStr, xRef, numOffset);
+      xRef->add(ref.num, ref.gen, offset, gTrue);
+      obj.free();
+    }
+  }
+  return objectsCount;
+}
+
 #ifndef DISABLE_OUTLINE
 Outline *PDFDoc::getOutline()
 {
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index a7113c8..246af50 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -219,6 +219,8 @@ public:
   //Return the PDF ID in the trailer dictionary (if any).
   GBool getID(GooString *permanent_id, GooString *update_id);
 
+  // Save one page with another name.
+  int savePageAs(GooString *name, int pageNo);
   // Save this file with another name.
   int saveAs(GooString *name, PDFWriteMode mode=writeStandard);
   // Save this file in the given output stream.
@@ -231,14 +233,30 @@ public:
   // Return a pointer to the GUI (XPDFCore or WinPDFCore object).
   void *getGUIData() { return guiData; }
 
+  // new writer routines
+  void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, PDFRectangle *cropBox, Object *pageCTM);
+  void markPageObjects(Dict *pageDict, XRef *xRef, Guint numOffset);
+  Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset);
+  static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef *xref, Guint numOffset);
+  static void writeHeader(OutStream *outStr, double version);
+  static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate,
+	  Guint startxRef, Ref *root, XRef *xRef, const char *fileName, Guint fileSize);
+
 private:
+  // insert referenced objects in XRef
+  void markDictionnary (Dict* dict, XRef *xRef, Guint numOffset);
+  void markObject (Object *obj, XRef *xRef, Guint numOffset);
+  static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset);
+
   // Add object to current file stream and return the offset of the beginning of the object
-  Guint writeObject (Object *obj, Ref *ref, OutStream* outStr);
-  void writeDictionnary (Dict* dict, OutStream* outStr);
-  void writeStream (Stream* str, OutStream* outStr);
-  void writeRawStream (Stream* str, OutStream* outStr);
+  Guint writeObject (Object *obj, Ref *ref, OutStream* outStr)
+  { return writeObject(obj, ref, outStr, getXRef(), 0); }
+  void writeDictionnary (Dict* dict, OutStream* outStr)
+	{ writeDictionnary(dict, outStr, getXRef(), 0); }
+  static void writeStream (Stream* str, OutStream* outStr);
+  static void writeRawStream (Stream* str, OutStream* outStr);
   void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate);
-  void writeString (GooString* s, OutStream* outStr);
+  static void writeString (GooString* s, OutStream* outStr);
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
@@ -283,6 +301,7 @@ private:
   int fopenErrno;
 
   Guint startXRefPos;		// offset of last xref table
+  XRef *countRef;
 };
 
 #endif
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 04cc617..47b4c0b 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -79,3 +79,18 @@ target_link_libraries(pdftohtml ${common_libs})
 install(TARGETS pdftohtml DESTINATION bin)
 install(FILES pdftohtml.1 DESTINATION share/man/man1)
 
+# pdfextract
+set(pdfextract_SOURCES ${common_srcs}
+  pdfextract.cc
+)
+add_executable(pdfextract ${pdftops_SOURCES})
+target_link_libraries(pdfextract ${common_libs})
+install(TARGETS pdfextract DESTINATION bin)
+
+# pdfmerge
+set(pdfmerge_SOURCES ${common_srcs}
+  pdfmerge.cc
+)
+add_executable(pdfmerge ${pdftops_SOURCES})
+target_link_libraries(pdfmerge ${common_libs})
+install(TARGETS pdfmerge DESTINATION bin)
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 624045e..3c6b11d 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -32,6 +32,8 @@ bin_PROGRAMS =					\
 	pdftops					\
 	pdftotext				\
 	pdftohtml				\
+	pdfextract			\
+	pdfmerge				\
 	$(pdftoppm_binary)
 
 dist_man1_MANS =				\
@@ -81,6 +83,14 @@ pdftohtml_SOURCES =				\
 	HtmlOutputDev.h				\
 	$(common)
 
+pdfextract_SOURCES =				\
+	pdfextract.cc				\
+	$(common)
+
+pdfmerge_SOURCES =				\
+	pdfmerge.cc				\
+	$(common)
+
 # Yay, automake!  It should be able to figure out that it has to dist
 # pdftoppm.1, but nooo.  So we just add it here.

_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Re: [poppler] Creating PDF with poppler ?

Reply via email to