Re: [poppler] Creating PDF with poppler ?

Thomas Freitag Mon, 20 Dec 2010 10:07:46 -0800

Hi all!

How I promised long time ago, here now a patch for the poppler writing routines and some new helper function, together with two small sample programs using them.

In merging the writer routines I encountered, that some of the problems I found are solved in the meantime. So, where they are solved, I left the code as it is, but manually merging could always cause new problems.

The main changes beside solving some smaller problems are that I make the most writer routines static, and that I add a new parameter numoffset. The reason for this is the small sample program pdfmerge, which merges several PDF to one bigger PDF, so there I have no PDFDoc object for the resulting PDF, and I have to change the num values of each single PDF.

The other small sample program I add is to do the other way round: Extract one or more pages from a multi page PDF to several one page PDFs.

I'm not an expert in configure, so I just attach the single main cc's. Building the programs is simular topdftotext, they need parseargs and the poppler library. So I would appreciate any help on this.

I think the patch together with the samples could make it easier to file samples for bugs, i.e. extracting only the page or pages which causes a regression, or merge several sample PDFs which causing regressions.

What are You thinking about it? Helpful or not?

Best regards,
Thomas

Am 28.10.2010 15:49, schrieb Jennings Jay:

Hi poppler folks,

I am looking for a C++ example that creates PDF files using the poppler library. I have poppler compiled (Windows XP / Visual Studio 2008) and I have working examples that *read* a PDF, but I don’t see anything in the poppler codebase, documentation, or mailing-list archives to show how to *create* a PDF. My real goal is to create a Geospatial PDF, but any kind of simple PDF creation example would probably help a lot. Thanks for the pointer.

.........................................................

Jay Jennings

Senior Software Engineer

www.geoeye.com

703.480.4614

[email protected]

21700 Atlantic Blvd. Dulles, VA 20166

Trusted Imagery Experts
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index cb3de02..5c9f35f 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -107,6 +107,7 @@ void PDFDoc::init()
   startXRefPos = ~(Guint)0;
   secHdlr = NULL;
   pageCache = NULL;
+  countRef = new XRef();
 }
 
 PDFDoc::PDFDoc()
@@ -314,6 +315,9 @@ PDFDoc::~PDFDoc() {
   if (fileName) {
     delete fileName;
   }
+  if (countRef) {
+    delete countRef;
+  }
 }
 
 
@@ -742,7 +746,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 
 }
 
-void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
+void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, 
Guint numOffset)
 {
   Object obj1;
   outStr->printf("<<");
@@ -751,7 +755,7 @@ void PDFDoc::writeDictionnary (Dict* dict, OutStream* 
outStr)
     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode 
*/);
     outStr->printf("/%s ", keyNameToPrint->getCString());
     delete keyNameToPrint;
-    writeObject(dict->getValNF(i, &obj1), NULL, outStr);
+    writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset);
     obj1.free();
   }
   outStr->printf(">> ");
@@ -789,173 +793,257 @@ void PDFDoc::writeRawStream (Stream* str, OutStream* 
outStr)
   outStr->printf("\r\nendstream\r\n");
 }
 
-void PDFDoc::writeString (GooString* s, OutStream* outStr)
-{
-  if (s->hasUnicodeMarker()) {
-    //unicode string don't necessary end with \0
-    const char* c = s->getCString();
-    outStr->printf("(");
-    for(int i=0; i<s->getLength(); i++) {
-      char unescaped = *(c+i)&0x000000ff;
-      //escape if needed
-      if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
-        outStr->printf("%c", '\\');
-      outStr->printf("%c", unescaped);
-    }
-    outStr->printf(") ");
-  } else {
-    const char* c = s->getCString();
-    outStr->printf("(");
-    for(int i=0; i<s->getLength(); i++) {
-      char unescaped = (*c)&0x000000ff;
-      //escape if needed
-      if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
-        outStr->printf("%c", '\\');
-      outStr->printf("%c", unescaped);
-      c++;
-    }
-    outStr->printf(") ");
-  }
-}
-
-Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
-{
-  Array *array;
-  Object obj1;
-  Guint offset = outStr->getPos();
-  int tmp;
-
-  if(ref) 
-    outStr->printf("%i %i obj ", ref->num, ref->gen);
-
-  switch (obj->getType()) {
-    case objBool:
-      outStr->printf("%s ", obj->getBool()?"true":"false");
-      break;
-    case objInt:
-      outStr->printf("%i ", obj->getInt());
-      break;
-    case objReal:
-    {
-      GooString s;
-      s.appendf("{0:.10g}", obj->getReal());
-      outStr->printf("%s ", s.getCString());
-      break;
-    }
-    case objString:
-      writeString(obj->getString(), outStr);
-      break;
-    case objName:
-    {
-      GooString name(obj->getName());
-      GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
-      outStr->printf("/%s ", nameToPrint->getCString());
-      delete nameToPrint;
-      break;
-    }
-    case objNull:
-      outStr->printf( "null ");
-      break;
-    case objArray:
-      array = obj->getArray();
-      outStr->printf("[");
-      for (int i=0; i<array->getLength(); i++) {
-        writeObject(array->getNF(i, &obj1), NULL,outStr);
-        obj1.free();
-      }
-      outStr->printf("] ");
-      break;
-    case objDict:
-      writeDictionnary (obj->getDict(),outStr);
-      break;
-    case objStream: 
-      {
-        //We can't modify stream with the current implementation (no write 
functions in Stream API)
-        // => the only type of streams which that have been modified are 
internal streams (=strWeird)
-        Stream *stream = obj->getStream();
-        if (stream->getKind() == strWeird) {
-          //we write the stream unencoded => TODO: write stream encoder
-          stream->reset();
-          //recalculate stream length
-          tmp = 0;
-          for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
-            tmp++;
-          }
-          obj1.initInt(tmp);
-          stream->getDict()->set("Length", &obj1);
-
-          //Remove Stream encoding
-          stream->getDict()->remove("Filter");
-          stream->getDict()->remove("DecodeParms");
-
-          writeDictionnary (stream->getDict(),outStr);
-          writeStream (stream,outStr);
-          obj1.free();
-        } else {
-          //raw stream copy
-          FilterStream *fs = dynamic_cast<FilterStream*>(stream);
-          if (fs) {
-            BaseStream *bs = fs->getBaseStream();
-            if (bs) {
-              Guint streamEnd;
-                if (xref->getStreamEnd(bs->getStart(), &streamEnd)) {
-                  Object val;
-                  val.initInt(streamEnd - bs->getStart());
-                  stream->getDict()->set("Length", &val);
-                }
-              }
-          }
-          writeDictionnary (stream->getDict(), outStr);
-          writeRawStream (stream, outStr);
-        }
-        break;
-      }
-    case objRef:
-      outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen);
-      break;
-    case objCmd:
-      outStr->printf("cmd\r\n");
-      break;
-    case objError:
-      outStr->printf("error\r\n");
-      break;
-    case objEOF:
-      outStr->printf("eof\r\n");
-      break;
-    case objNone:
-      outStr->printf("none\r\n");
-      break;
-    default:
-      error(-1,"Unhandled objType : %i, please report a bug with a 
testcase\r\n", obj->getType());
-      break;
-  }
-  if (ref)
-    outStr->printf("endobj\r\n");
-  return offset;
-}
-
+void PDFDoc::writeString (GooString* s, OutStream* outStr)
+{
+  if (s->hasUnicodeMarker()) {
+    //unicode string don't necessary end with \0
+    const char* c = s->getCString();
+    outStr->printf("(");
+    for(int i=0; i<s->getLength(); i++) {
+      char unescaped = *(c+i)&0x000000ff;
+      //escape if needed
+      if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
+        outStr->printf("%c", '\\');
+      outStr->printf("%c", unescaped);
+    }
+    outStr->printf(") ");
+  } else {
+    const char* c = s->getCString();
+    outStr->printf("(");
+    for(int i=0; i<s->getLength(); i++) {
+      char unescaped = *(c+i)&0x000000ff;
+      //escape if needed
+         if (unescaped == '\r')
+        outStr->printf("\\r");
+         else if (unescaped == '\n')
+        outStr->printf("\\n");
+         else {
+                 if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
+                       outStr->printf("%c", '\\');
+                 outStr->printf("%c", unescaped);
+         }
+    }
+    outStr->printf(") ");
+  }
+}
+
+
+Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef 
*xRef, Guint numOffset)
+{
+  Array *array;
+  Object obj1;
+  Guint offset = outStr->getPos();
+  int tmp;
+
+  if(ref) 
+    outStr->printf("%i %i obj ", ref->num, ref->gen);
+
+  switch (obj->getType()) {
+    case objBool:
+      outStr->printf("%s ", obj->getBool()?"true":"false");
+      break;
+    case objInt:
+      outStr->printf("%i ", obj->getInt());
+      break;
+    case objReal:
+    {
+      GooString s;
+      s.appendf("{0:.10g}", obj->getReal());
+      outStr->printf("%s ", s.getCString());
+      break;
+    }
+    case objString:
+      writeString(obj->getString(), outStr);
+      break;
+    case objName:
+    {
+      GooString name(obj->getName());
+      GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
+      outStr->printf("/%s ", nameToPrint->getCString());
+      delete nameToPrint;
+      break;
+    }
+    case objNull:
+      outStr->printf( "null ");
+      break;
+    case objArray:
+      array = obj->getArray();
+      outStr->printf("[");
+      for (int i=0; i<array->getLength(); i++) {
+        writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset);
+        obj1.free();
+      }
+      outStr->printf("] ");
+      break;
+    case objDict:
+      writeDictionnary (obj->getDict(),outStr, xRef, numOffset);
+      break;
+    case objStream: 
+      {
+        //We can't modify stream with the current implementation (no write 
functions in Stream API)
+        // => the only type of streams which that have been modified are 
internal streams (=strWeird)
+        Stream *stream = obj->getStream();
+        if (stream->getKind() == strWeird) {
+          //we write the stream unencoded => TODO: write stream encoder
+          stream->reset();
+          //recalculate stream length
+          tmp = 0;
+          for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
+            tmp++;
+          }
+          obj1.initInt(tmp);
+          stream->getDict()->set("Length", &obj1);
+
+          //Remove Stream encoding
+          stream->getDict()->remove("Filter");
+          stream->getDict()->remove("DecodeParms");
+
+          writeDictionnary (stream->getDict(),outStr, xRef, numOffset);
+          writeStream (stream,outStr);
+          obj1.free();
+        } else {
+          //raw stream copy
+          FilterStream *fs = dynamic_cast<FilterStream*>(stream);
+          if (fs) {
+            BaseStream *bs = fs->getBaseStream();
+            if (bs) {
+              Guint streamEnd;
+                if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
+                  Object val;
+                  val.initInt(streamEnd - bs->getStart());
+                  stream->getDict()->set("Length", &val);
+                }
+              }
+          }
+          writeDictionnary (stream->getDict(), outStr, xRef, numOffset);
+          writeRawStream (stream, outStr);
+        }
+        break;
+      }
+    case objRef:
+      outStr->printf("%i %i R ", obj->getRef().num + numOffset, 
obj->getRef().gen);
+      break;
+    case objCmd:
+                 outStr->printf("%s\n", obj->getCmd());
+      break;
+    case objError:
+      outStr->printf("error\r\n");
+      break;
+    case objEOF:
+      outStr->printf("eof\r\n");
+      break;
+    case objNone:
+      outStr->printf("none\r\n");
+      break;
+    default:
+      error(-1,"Unhandled objType : %i, please report a bug with a 
testcase\r\n", obj->getType());
+      break;
+  }
+  if (ref)
+    outStr->printf("endobj\r\n");
+  return offset;
+}
+
+void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, 
+                                                         OutStream* outStr, 
GBool incrUpdate,
+                                                         Guint startxRef, Ref 
*root, XRef *xRef, const char *fileName,
+                                                         Guint fileSize)
+{
+  Dict *trailerDict = new Dict(xRef);
+  Object obj1;
+  obj1.initInt(uxrefSize);
+  trailerDict->set("Size", &obj1);
+  obj1.free();
+
+  //build a new ID, as recommended in the reference, uses:
+  // - current time
+  // - file name
+  // - file size
+  // - values of entry in information dictionnary
+  GooString message;
+  char buffer[256];
+  sprintf(buffer, "%i", (int)time(NULL));
+  message.append(buffer);
+  message.append(fileName);
+
+  sprintf(buffer, "%i", fileSize);
+  message.append(buffer);
+
+  //info dict -- only use text string
+  if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
+    for(int i=0; i<obj1.getDict()->getLength(); i++) {
+      Object obj2;
+      obj1.getDict()->getVal(i, &obj2);  
+      if (obj2.isString()) {
+        message.append(obj2.getString());
+      }
+      obj2.free();
+    }
+  }
+  obj1.free();
+
+  //calculate md5 digest
+  Guchar digest[16];
+  Decrypt::md5((Guchar*)message.getCString(), message.getLength(), digest);
+  obj1.initString(new GooString((const char*)digest, 16));
+
+  //create ID array
+  Object obj2,obj3,obj4,obj5;
+  obj2.initArray(xRef);
+
+  if (incrUpdate) {
+    //only update the second part of the array
+    if(xRef->getTrailerDict()->getDict()->lookup("ID", &obj4) != NULL) {
+      if (!obj4.isArray()) {
+        error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an 
array. Trying to continue");
+      } else {
+        //Get the first part of the ID
+        obj4.arrayGet(0,&obj3); 
+
+        obj2.arrayAdd(&obj3); 
+        obj2.arrayAdd(&obj1);
+        trailerDict->set("ID", &obj2);
+      }
+    }
+  } else {
+    //new file => same values for the two identifiers
+    obj2.arrayAdd(&obj1);
+    obj1.initString(new GooString((const char*)digest, 16));
+    obj2.arrayAdd(&obj1);
+    trailerDict->set("ID", &obj2);
+  }
+
+  obj1.initRef(root->num, root->gen);
+  trailerDict->set("Root", &obj1);
+
+  if (incrUpdate) { 
+    obj1.initInt(startxRef);
+    trailerDict->set("Prev", &obj1);
+  }
+  
+  if (!xRef->getTrailerDict()->isNone()) {
+         xRef->getDocInfoNF(&obj5);
+         if (!obj5.isNull()) {
+               trailerDict->set("Info", &obj5);
+         }
+  }
+  
+  outStr->printf( "trailer\r\n");
+  writeDictionnary(trailerDict, outStr, xRef, 0);
+  outStr->printf( "\r\nstartxref\r\n");
+  outStr->printf( "%i\r\n", uxrefOffset);
+  outStr->printf( "%%%%EOF\r\n");
+
+  delete trailerDict;
+}
+
 void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* 
outStr, GBool incrUpdate)
 {
-  Dict *trailerDict = new Dict(xref);
-  Object obj1;
-  obj1.initInt(uxrefSize);
-  trailerDict->set("Size", &obj1);
-  obj1.free();
-
-
-  //build a new ID, as recommended in the reference, uses:
-  // - current time
-  // - file name
-  // - file size
-  // - values of entry in information dictionnary
-  GooString message;
-  char buffer[256];
-  sprintf(buffer, "%i", (int)time(NULL));
-  message.append(buffer);
-  if (fileName)
-    message.append(fileName);
+  char *fileName;
+  if (this->fileName)
+         fileName = this->fileName->getCString();
   else
-    message.append("streamwithoutfilename.pdf");
+         fileName = "streamwithoutfilename.pdf";
   // file size
   unsigned int fileSize = 0;
   int c;
@@ -963,77 +1051,319 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int 
uxrefSize, OutStream* outStr,
   while ((c = str->getChar()) != EOF) {
     fileSize++;
   }
-  str->close();
-  sprintf(buffer, "%i", fileSize);
-  message.append(buffer);
-
-  //info dict -- only use text string
-  if (xref->getDocInfo(&obj1)->isDict()) {
-    for(int i=0; i<obj1.getDict()->getLength(); i++) {
-      Object obj2;
-      obj1.getDict()->getVal(i, &obj2);  
-      if (obj2.isString()) {
-        message.append(obj2.getString());
-      }
-      obj2.free();
-    }
-  }
-  obj1.free();
-
-  //calculate md5 digest
-  Guchar digest[16];
-  Decrypt::md5((Guchar*)message.getCString(), message.getLength(), digest);
-  obj1.initString(new GooString((const char*)digest, 16));
-
-  //create ID array
-  Object obj2,obj3,obj4,obj5;
-  obj2.initArray(xref);
-
-  if (incrUpdate) {
-    //only update the second part of the array
-    if(xref->getTrailerDict()->getDict()->lookup("ID", &obj4) != NULL) {
-      if (!obj4.isArray()) {
-        error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an 
array. Trying to continue");
-      } else {
-        //Get the first part of the ID
-        obj4.arrayGet(0,&obj3); 
-
-        obj2.arrayAdd(&obj3); 
-        obj2.arrayAdd(&obj1);
-        trailerDict->set("ID", &obj2);
-      }
-    }
-  } else {
-    //new file => same values for the two identifiers
-    obj2.arrayAdd(&obj1);
-    obj1.initString(new GooString((const char*)digest, 16));
-    obj2.arrayAdd(&obj1);
-    trailerDict->set("ID", &obj2);
-  }
-
-
-  obj1.initRef(xref->getRootNum(), xref->getRootGen());
-  trailerDict->set("Root", &obj1);
-
-  if (incrUpdate) { 
-    obj1.initInt(getStartXRef());
-    trailerDict->set("Prev", &obj1);
-  }
-  
-  xref->getDocInfoNF(&obj5);
-  if (!obj5.isNull()) {
-    trailerDict->set("Info", &obj5);
-  }
-  
-  outStr->printf( "trailer\r\n");
-  writeDictionnary(trailerDict, outStr);
-  outStr->printf( "\r\nstartxref\r\n");
-  outStr->printf( "%i\r\n", uxrefOffset);
-  outStr->printf( "%%%%EOF\r\n");
-
-  delete trailerDict;
+  str->close();
+  Ref ref;
+  ref.num = getXRef()->getRootNum();
+  ref.gen = getXRef()->getRootGen();
+  writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), 
&ref, getXRef(), fileName, fileSize);
 }
 
+void PDFDoc::writeHeader(OutStream *outStr, double version)
+{
+   outStr->printf("%%PDF-%.1f\n", version);
+   outStr->printf("%%\xE2\xE3\xCF\xD3\n");
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, Guint numOffset)
+///////////////////////////////////////////////////////////////////////////
+// insert referenced objects in dictionnary in XRef
+///////////////////////////////////////////////////////////////////////////
+{
+  Object obj1;
+  for (int i=0; i<dict->getLength(); i++) {
+    markObject(dict->getValNF(i, &obj1), xRef, numOffset);
+    obj1.free();
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::markObject (Object* obj, XRef *xRef, Guint numOffset)
+///////////////////////////////////////////////////////////////////////////
+// insert referenced objects in XRef
+///////////////////////////////////////////////////////////////////////////
+{
+  Array *array;
+  Object obj1;
+
+  switch (obj->getType()) {
+    case objArray:
+      array = obj->getArray();
+      for (int i=0; i<array->getLength(); i++) {
+        markObject(array->getNF(i, &obj1), xRef, numOffset);
+        obj1.free();
+      }
+      break;
+    case objDict:
+      markDictionnary (obj->getDict(), xRef, numOffset);
+      break;
+    case objStream: 
+      {
+        Stream *stream = obj->getStream();
+               markDictionnary (stream->getDict(), xRef, numOffset);
+        break;
+      }
+    case objRef:
+         if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || 
xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
+                 if (getXRef()->getEntry(obj->getRef().num)->type == 
xrefEntryFree)
+                         return;  // already marked as free => should be 
replaced
+                 xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 
0, gTrue);
+                 if (getXRef()->getEntry(obj->getRef().num)->type == 
xrefEntryCompressed)
+                         xRef->getEntry(obj->getRef().num + numOffset)->type = 
xrefEntryCompressed;
+         }
+         if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() 
|| 
+                 countRef->getEntry(obj->getRef().num + numOffset)->type == 
xrefEntryFree) {
+                 countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
+         } else {
+                 XRefEntry *entry = countRef->getEntry(obj->getRef().num + 
numOffset);
+                 entry->gen++;
+         } 
+         {
+                 Object obj1;
+                 this->getXRef()->fetch(obj->getRef().num, obj->getRef().gen, 
&obj1);
+                 markObject(&obj1, xRef, numOffset);
+                 obj1.free();
+         }
+         break;
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::ReplacePageDict(int pageNo, int rotate,
+                                                               PDFRectangle 
*mediaBox, 
+                                                               PDFRectangle 
*cropBox, Object *pageCTM)
+///////////////////////////////////////////////////////////////////////////
+// rewrite pageDict with MediaBox, CropBox and new page CTM
+///////////////////////////////////////////////////////////////////////////
+{
+       Ref *refPage = getCatalog()->getPageRef(pageNo);
+       Object page;
+       getXRef()->fetch(refPage->num, refPage->gen, &page);
+       Dict *pageDict = page.getDict();
+       pageDict->remove("MediaBox");
+       pageDict->remove("CropBox");
+       pageDict->remove("ArtBox");
+       pageDict->remove("BleedBox");
+       pageDict->remove("TrimBox");
+       pageDict->remove("Rotate");
+       Object *mediaBoxObj = new Object();
+       mediaBoxObj->initArray(getXRef());
+       Object *murx = new Object();
+       murx->initReal(mediaBox->x1);
+       Object *mury = new Object();
+       mury->initReal(mediaBox->y1);
+       Object *mllx = new Object();
+       mllx->initReal(mediaBox->x2);
+       Object *mlly = new Object();
+       mlly->initReal(mediaBox->y2);
+       mediaBoxObj->arrayAdd(murx);
+       mediaBoxObj->arrayAdd(mury);
+       mediaBoxObj->arrayAdd(mllx);
+       mediaBoxObj->arrayAdd(mlly);
+       pageDict->add(copyString("MediaBox"), mediaBoxObj);
+       if (cropBox != NULL) {
+               Object *cropBoxObj = new Object();
+               cropBoxObj->initArray(getXRef());
+               Object *curx = new Object();
+               curx->initReal(cropBox->x1);
+               Object *cury = new Object();
+               cury->initReal(cropBox->y1);
+               Object *cllx = new Object();
+               cllx->initReal(cropBox->x2);
+               Object *clly = new Object();
+               clly->initReal(cropBox->y2);
+               cropBoxObj->arrayAdd(curx);
+               cropBoxObj->arrayAdd(cury);
+               cropBoxObj->arrayAdd(cllx);
+               cropBoxObj->arrayAdd(clly);
+               pageDict->add(copyString("CropBox"), cropBoxObj);
+       }
+       Object *rotateObj = new Object();
+       rotateObj->initInt(rotate);
+       pageDict->add(copyString("Rotate"), rotateObj);
+       if (pageCTM != NULL) {
+               Object *contents = new Object();
+               Ref cmRef = getXRef()->addIndirectObject(pageCTM);
+               Object *ref = new Object();
+               ref->initRef(cmRef.num, cmRef.gen);
+               pageDict->lookupNF("Contents", contents);
+               Object *newContents = new Object();
+               newContents->initArray(getXRef());
+               if (contents->getType() == objRef) {
+                       newContents->arrayAdd(ref);
+                       newContents->arrayAdd(contents);
+               } else {
+                       newContents->arrayAdd(ref);
+                       for (int i = 0; i < contents->arrayGetLength(); i++) {
+                               Object *contentEle = new Object();
+                               contents->arrayGetNF(i, contentEle);
+                               newContents->arrayAdd(contentEle);
+                       }
+               }
+               pageDict->remove("Contents");
+               pageDict->add(copyString("Contents"), newContents);
+       }
+       getXRef()->setModifiedObject(&page, *refPage);
+       page.free();
+}
+
+///////////////////////////////////////////////////////////////////////////
+void PDFDoc::MarkPageObjects(Dict *pageDict, XRef *xRef, Guint numOffset) 
+///////////////////////////////////////////////////////////////////////////
+// write all objects used by pageDict to outStr
+///////////////////////////////////////////////////////////////////////////
+{
+   int n;
+
+   for (n = 0; n < pageDict->getLength(); n++) {
+          const char *key = pageDict->getKey(n);
+          Object value; pageDict->getValNF(n, &value);
+          if (strcmp(key, "Parent") != 0) {
+                  markObject(&value, xRef, numOffset);
+          }
+          value.free();
+   }
+}
+
+///////////////////////////////////////////////////////////////////////////
+Guint PDFDoc::WritePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset) 
+///////////////////////////////////////////////////////////////////////////
+// write all objects used by pageDict to outStr
+///////////////////////////////////////////////////////////////////////////
+{
+   int n;
+   Guint objectsCount = 0; //count the number of objects in the XRef(s)
+
+   for (n=numOffset; n < xRef->getNumObjects(); n++) {
+          if (xRef->getEntry(n)->type != xrefEntryFree) {
+                  Object obj;
+                  Ref ref;
+                  ref.num = n;
+                  ref.gen = xRef->getEntry(n)->gen;
+                  objectsCount++;
+                  this->getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
+                  Guint offset = writeObject(&obj, &ref, outStr, xRef, 
numOffset);
+                  xRef->add(ref.num, ref.gen, offset, gTrue);
+                  obj.free();
+          }
+   }
+   return objectsCount;
+}
+
+GBool PDFDoc::ExtractPage(const char * fileName, int pageNo) 
+{
+   FILE *f;
+   OutStream *outStr;
+   XRef *yRef;
+   int n;
+   int rootNum = getXRef()->getSize() + 1;
+
+   if (pageNo < 1 || pageNo > this->getNumPages()) {
+          error(-1, "Illegal pageNo: %d(%d)", pageNo, this->getNumPages() );
+          return false;
+   }
+   PDFRectangle *cropBox = NULL;
+   if (getCatalog()->getPage(pageNo)->isCropped())
+          cropBox = getCatalog()->getPage(pageNo)->getCropBox();
+   ReplacePageDict(pageNo, 
+          getCatalog()->getPage(pageNo)->getRotate(),
+          getCatalog()->getPage(pageNo)->getMediaBox(),
+          cropBox, NULL);
+   Ref *refPage = this->getCatalog()->getPageRef(pageNo);
+   Object page;
+   this->getXRef()->fetch(refPage->num, refPage->gen, &page);
+
+   if (!(f = fopen(fileName, "wb"))) {
+          error(-1, "Couldn't open file '%s'", fileName);
+          return false;
+   }
+   outStr = new FileOutStream(f,0);
+
+   yRef = new XRef();
+   countRef = new XRef();
+   yRef->add(0, 65535, 0, gFalse);
+   writeHeader(outStr, (double) getPDFMajorVersion () + getPDFMinorVersion() / 
10.0);
+
+   // get and mark optional content groups
+   OCGs *ocgs = getCatalog()->getOptContentConfig();
+   if (ocgs != NULL) {
+          Object catDict, optContentProps;
+          getXRef()->getCatalog(&catDict);
+          catDict.dictLookup("OCProperties", &optContentProps);
+          Dict *pageDict = optContentProps.getDict();
+          MarkPageObjects(pageDict, yRef, 0);
+          catDict.free();
+          optContentProps.free();
+   }
+
+   Dict *pageDict = page.getDict();
+   MarkPageObjects(pageDict, yRef, 0);
+   Guint objectsCount = WritePageObjects(outStr, yRef, 0);
+
+   yRef->add(rootNum,0,outStr->getPos(),gTrue);
+   outStr->printf("%d 0 obj\n", rootNum);
+   outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); 
+   if (ocgs != NULL) {
+          Object catDict, optContentProps;
+          getXRef()->getCatalog(&catDict);
+          catDict.dictLookup("OCProperties", &optContentProps);
+          outStr->printf(" /OCProperties <<");
+          Dict *pageDict = optContentProps.getDict();
+          for (n = 0; n < pageDict->getLength(); n++) {
+                  if (n > 0) outStr->printf(" ");
+                  const char *key = pageDict->getKey(n);
+                  Object value; pageDict->getValNF(n, &value);
+                  outStr->printf("/%s ", key);
+                  writeObject(&value, NULL, outStr, getXRef(), 0);
+                  value.free();
+          }
+          outStr->printf(" >> ");
+          catDict.free();
+          optContentProps.free();
+   }
+   outStr->printf(">>\nendobj\n");
+   objectsCount++;
+
+   yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
+   outStr->printf("%d 0 obj\n", rootNum + 1);
+   outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 
2);
+   outStr->printf("endobj\n");
+   objectsCount++;
+
+   yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
+   outStr->printf("%d 0 obj\n", rootNum + 2);
+   outStr->printf("<< ");
+   for (n = 0; n < pageDict->getLength(); n++) {
+          if (n > 0) outStr->printf(" ");
+          const char *key = pageDict->getKey(n);
+          Object value; pageDict->getValNF(n, &value);
+          if (strcmp(key, "Parent") == 0) {
+                  outStr->printf("/Parent %d 0 R", rootNum + 1);
+          } else {
+                  outStr->printf("/%s ", key);
+              writeObject(&value, NULL, outStr, getXRef(), 0); 
+          }
+          value.free();
+   }
+   outStr->printf(" >>\nendobj\n");
+   objectsCount++;
+   page.free();
+
+   Guint uxrefOffset = outStr->getPos();
+   yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
+
+   Ref ref;
+   ref.num = rootNum;
+   ref.gen = 0;
+   writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), 
fileName, outStr->getPos());
+
+   outStr->close();
+   fclose(f);
+   delete yRef;
+
+   return true;
+}
+
 #ifndef DISABLE_OUTLINE
 Outline *PDFDoc::getOutline()
 {
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index a7113c8..95c1462 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -231,14 +231,31 @@ public:
   // Return a pointer to the GUI (XPDFCore or WinPDFCore object).
   void *getGUIData() { return guiData; }
 
+  // new writer routines
+  void ReplacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, 
PDFRectangle *cropBox, Object *pageCTM);
+  GBool ExtractPage(const char * fileName, int pageNo);
+  void MarkPageObjects(Dict *pageDict, XRef *xRef, Guint numOffset);
+  Guint WritePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset);
+  static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef 
*xref, Guint numOffset);
+  static void writeHeader(OutStream *outStr, double version);
+  static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* 
outStr, GBool incrUpdate,
+         Guint startxRef, Ref *root, XRef *xRef, const char *fileName, Guint 
fileSize);
+
 private:
+  // insert referenced objects in XRef
+  void markDictionnary (Dict* dict, XRef *xRef, Guint numOffset);
+  void markObject (Object *obj, XRef *xRef, Guint numOffset);
+  static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, 
Guint numOffset);
+
   // Add object to current file stream and return the offset of the beginning 
of the object
-  Guint writeObject (Object *obj, Ref *ref, OutStream* outStr);
-  void writeDictionnary (Dict* dict, OutStream* outStr);
-  void writeStream (Stream* str, OutStream* outStr);
-  void writeRawStream (Stream* str, OutStream* outStr);
-  void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, 
GBool incrUpdate);
-  void writeString (GooString* s, OutStream* outStr);
+  Guint writeObject (Object *obj, Ref *ref, OutStream* outStr)
+  { return writeObject(obj, ref, outStr, getXRef(), 0); }
+  void writeDictionnary (Dict* dict, OutStream* outStr)
+       { writeDictionnary(dict, outStr, getXRef(), 0); }
+  static void writeStream (Stream* str, OutStream* outStr);
+  static void writeRawStream (Stream* str, OutStream* outStr);
+  void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, 
GBool incrUpdate);
+  static void writeString (GooString* s, OutStream* outStr);
   void saveIncrementalUpdate (OutStream* outStr);
   void saveCompleteRewrite (OutStream* outStr);
 
@@ -283,6 +300,8 @@ private:
   int fopenErrno;
 
   Guint startXRefPos;          // offset of last xref table
+  XRef *countRef;
+
 };
 
 #endif

#include "config.h"
#include <poppler-config.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include "parseargs.h"
#include "goo/GooString.h"
#include "PDFDoc.h"

static int firstPage = 0;
static int lastPage = 0;
static GBool printVersion = gFalse;
static GBool printHelp = gFalse;

static const ArgDesc argDesc[] = {
  {"-f",       argInt,      &firstPage,     0,
   "first page to extract"},
  {"-l",       argInt,      &lastPage,      0,
   "last page to extract"},
  {"-v",       argFlag,     &printVersion,  0,
   "print copyright and version info"},
  {"-h",       argFlag,     &printHelp,     0,
   "print usage information"},
  {"-help",    argFlag,     &printHelp,     0,
   "print usage information"},
  {"--help",   argFlag,     &printHelp,     0,
   "print usage information"},
  {"-?",       argFlag,     &printHelp,     0,
   "print usage information"},
  {NULL}
};

bool ExtractPages(const char * srcFileName, const char *destFileName) {
        char pathName[1024];

        GooString *gfileName = new GooString(srcFileName);
        PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);

        if (firstPage == 0 && lastPage == 0) {
                firstPage = 1;
                lastPage = doc->getNumPages();
        }
        if (lastPage == 0)
                lastPage = firstPage;
        if (firstPage == 0)
                firstPage = 1;
        for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
                sprintf(pathName, destFileName, pageNo);
                if (!doc->ExtractPage(pathName, pageNo))
                        return false;
        }
        return true;
}

int main(int argc, char *argv[]) {
  Object info;
  GBool ok;
  int exitCode;

  exitCode = 99;

  // parse args
  ok = parseArgs(argDesc, &argc, argv);
  if (!ok || argc != 3 || printVersion || printHelp) {
    fprintf(stderr, "pdfextract version %s\n", PACKAGE_VERSION);
    fprintf(stderr, "%s\n", popplerCopyright);
    fprintf(stderr, "%s\n", xpdfCopyright);
    if (!printVersion) {
      printUsage("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>", 
argDesc);
    }
    if (printVersion || printHelp)
      exitCode = 0;
    goto err0;
  }
  ExtractPages(argv[1], argv[2]);

 err0:

  return exitCode;
}

#include <GooVector.h>
#include <PDFDoc.h>
#include "parseargs.h"
#include "config.h"
#include <poppler-config.h>

static GBool printVersion = gFalse;
static GBool printHelp = gFalse;

static const ArgDesc argDesc[] = {
  {"-v",       argFlag,     &printVersion,  0,
   "print copyright and version info"},
  {"-h",       argFlag,     &printHelp,     0,
   "print usage information"},
  {"-help",    argFlag,     &printHelp,     0,
   "print usage information"},
  {"--help",   argFlag,     &printHelp,     0,
   "print usage information"},
  {"-?",       argFlag,     &printHelp,     0,
   "print usage information"},
  {NULL}
};

///////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[])
///////////////////////////////////////////////////////////////////////////
// Merge PDF files given by arguments 1 to argc-2 and write the result
// to the file specified by argument argc-1.
///////////////////////////////////////////////////////////////////////////
{
        int objectsCount = 0;
        Guint numOffset = 0;
        GooVector<Object> pages;
        GooVector<Guint> offsets;
        XRef *yRef;
        FILE *f;
        OutStream *outStr;
        int i;
        int j, rootNum; 
        GooVector<PDFDoc *> docs;
        double version = 0;
        char *fileName = argv[argc-1];
        int exitCode;
        
        exitCode = 99;
        if (argc <= 3 || printVersion || printHelp) {
                fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
                fprintf(stderr, "%s\n", popplerCopyright);
                fprintf(stderr, "%s\n", xpdfCopyright);
                if (!printVersion) {
                  printUsage("pdfmerge", 
"<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>", argDesc);
                }
                if (printVersion || printHelp)
                  exitCode = 0;
                goto err0;
        }
        exitCode = 0;

        for (i = 1; i < argc-1; i++) {
                GooString *gfileName = new GooString(argv[i]);
                PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
                if (doc->isOk()) {
                        docs.push_back(doc);
                        if ((doc->getPDFMajorVersion () + 
doc->getPDFMinorVersion() / 10.0) > version)
                                version = doc->getPDFMajorVersion () + 
doc->getPDFMinorVersion() / 10.0;
                        continue;
                }
                break;
        }

        if (!(f = fopen(fileName, "wb"))) {
                error(-1, "Couldn't open file '%s'", fileName);
                return -1;
        }
        outStr = new FileOutStream(f,0);
        
        yRef = new XRef();
        yRef->add(0, 65535, 0, gFalse);
        PDFDoc::writeHeader(outStr, version);
        
        for (i = 0; i < (int) docs.size(); i++) {
                for (j=1; j <= docs[i]->getNumPages(); j++) {
                   PDFRectangle *cropBox = NULL;
//                 docs[i]->countRef = new XRef();
                   if (docs[i]->getCatalog()->getPage(j)->isCropped())
                           cropBox = 
docs[i]->getCatalog()->getPage(j)->getCropBox();
                   docs[i]->ReplacePageDict(j, 
                           docs[i]->getCatalog()->getPage(j)->getRotate(),
                           docs[i]->getCatalog()->getPage(j)->getMediaBox(),
                           cropBox, NULL);
                        Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
                        Object page;
                        docs[i]->getXRef()->fetch(refPage->num, refPage->gen, 
&page);
                        pages.push_back(page);
                        offsets.push_back(numOffset);
                        Dict *pageDict = page.getDict();
                        docs[i]->MarkPageObjects(pageDict, yRef, numOffset);
                }
                objectsCount += docs[i]->WritePageObjects(outStr, yRef, 
numOffset);
                numOffset = yRef->getNumObjects() + 1;
        }

        rootNum = yRef->getNumObjects() + 1;
        yRef->add(rootNum,0,outStr->getPos(),gTrue);
        outStr->printf("%d 0 obj\n", rootNum);
        outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); 
        outStr->printf(">>\nendobj\n");
        objectsCount++;
                
        yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
        outStr->printf("%d 0 obj\n", rootNum + 1);
        outStr->printf("<< /Type /Pages /Kids [");
        for (j = 0; j < (int) pages.size(); j++)
                outStr->printf(" %d 0 R", rootNum + j + 2);
        outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
        objectsCount++;

        for (i = 0; i < (int) pages.size(); i++) {
                yRef->add(rootNum + i + 2,0,outStr->getPos(),gTrue);
                outStr->printf("%d 0 obj\n", rootNum  + i + 2);
                outStr->printf("<< ");
                Dict *pageDict = pages[i].getDict();
                for (j = 0; j < pageDict->getLength(); j++) {
                        if (j > 0) outStr->printf(" ");
                        const char *key = pageDict->getKey(j);
                        Object value; pageDict->getValNF(j, &value);
                        if (strcmp(key, "Parent") == 0) {
                                outStr->printf("/Parent %d 0 R", rootNum + 1);
                        } else {
                                outStr->printf("/%s ", key);
                                PDFDoc::writeObject(&value, NULL, outStr, yRef, 
offsets[i]);
                        }
                        value.free();
                }
                outStr->printf(" >>\nendobj\n");
                objectsCount++;
        }
        Guint uxrefOffset = outStr->getPos();
        yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries 
*/);

        Ref ref;
        ref.num = rootNum;
        ref.gen = 0;
        PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 
0, &ref, yRef, fileName, outStr->getPos());

        outStr->close();
        fclose(f);
        delete yRef;
        for (j = 0; j < (int) pages.size(); j++) {
                pages[j].free();
        }
        for (i = 0; i < (int) docs.size(); i++) {
                delete docs[i];
        }
 err0:

  return exitCode;
}

_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Re: [poppler] Creating PDF with poppler ?

Reply via email to