On 07/01/13 21:09, [email protected] wrote:
> void (*progressCbk)(int pageNum, float progressPct, void* user_data)
> = NULL, void *progressCbkData = NULL );
I would use a double for the progress.
>> * The DummyOutputDev
NullOutputDev would be a better name. But I would prefer the pre-
rendering step be avoided.
> On 06/01/13 18:35, Adrian Johnson <[email protected]> wrote:
>> Have you got a sample PDF we can test with? It would be interesting
>> to see if there is anything that can be done to speed up the
>> rendering.
>
> https://dl.dropbox.com/u/16106653/pdfToImage-progress-example.zip
>
> Note: the PDFs in this zip will probably take a very, very long time
> to render with standard desktop PDF viewers. The first one
> 2012_12_28_13_6_22.pdf (generated by Cairo) is reasonably well
> behaved. The second one 2012_12_28_13_6_22_0,5stroke-1.pdf (generated
> by loading 2012_12_28_13_6_22.pdf into Adobe Illustrator and
> modifying the stroke width for all of the lines) seems to have
> multiple embedded streams, but is typical of the documents I was
> working with in making this patch.
>
> After compiling pdfToImage-progress.cpp, try: ./a.out -i
> 2012_12_28_13_6_22.pdf -o test.png --width 12000 (12000 pixels is the
> image size necessary to produce a ~100cm wide print at 300DPI) (This
> is a direct output from an openFrameworks application that draws
> several thousand line segments every frame, running at 60fps for a
> minute or two)
>
> For a poor performance example, try ./a.out -i
> 2012_12_28_13_6_22_0,5stroke-1.pdf -o test.png --width 3508 (3508
> pixels wide is a DIN-A4 print at 300DPI) (This is generated from the
> same data file as above, after loading it into Adobe Illustrator,
> selecting all the lines and setting their stroke width to 0.5 -- yes,
> there are better ways to achieve this than via a GUI application, but
> this is the use case I am working from.)
I timed both pdfs with splash and cairo using a width/height of
12000/9000 for both. The results (in mins:seconds) are interesting:
splash cairo
2012_12_28_13_6_22.pdf 4:47 5:29
2012_12_28_13_6_22_0,5stroke-1.pdf 276:40 11:01
The second pdf contains each operation in separate a transparency group
which splash handles poorly. A more efficient way to change the line
width from 1 to 0.5 would be to uncompress the file with pdftk, replace
the "1 w" with "0.5 w" then run it through pdftk again to fix up the
xref. The process could be scripted.
>> Instead of parsing the content twice to get the operation count
>> you could use the current position in the content stream to report
>> progress. You would have to use the compressed stream position
>> (getBaseStream()->getLength()) since the uncompressed length of a
>> stream is not stored in the pdf file.
>
> I initially attempted that. With the
> 2012_12_28_13_6_22_0,5stroke-1.pdf from the zip linked above, it
> seems there are multiple streams embedded in the PDF, as the stream
> position pointer keeps on jumping back to zero. I'm assuming there
> are multiple streams, which means you'd need to do a preprocessing
> step which involves looping through the PDF, adding up all the stream
> lengths, and then actually rendering .... which is exactly what I've
> done with this patch, but counting operations rather than stream
> lengths.
See the attached patch which implements the stream position tracking.
There is a big difference. Counting the operations requires
uncompressing and parsing the entire stream. The attached patch just
adds up the length of each stream. As the stream length is already
available in the stream objects passed to Gfx, no processing effort is
incurred to extract the lengths.
diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index 8891c31..de89986 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -666,6 +666,9 @@ void Gfx::display(Object *obj, GBool topLevel) {
Object obj2;
int i;
+ if (topLevel) {
+ progressTotal = 0;
+ }
if (obj->isArray()) {
for (i = 0; i < obj->arrayGetLength(); ++i) {
obj->arrayGet(i, &obj2);
@@ -673,12 +676,18 @@ void Gfx::display(Object *obj, GBool topLevel) {
error(errSyntaxError, -1, "Weird page contents");
obj2.free();
return;
+ } else if (topLevel) {
+ BaseStream *str = obj2.getStream()->getBaseStream();
+ progressTotal += str->getLength();
}
obj2.free();
}
} else if (!obj->isStream()) {
error(errSyntaxError, -1, "Weird page contents");
return;
+ } else if (topLevel) {
+ BaseStream *str = obj->getStream()->getBaseStream();
+ progressTotal += str->getLength();
}
parser = new Parser(xref, new Lexer(xref, obj), gFalse);
// on topLevel, reset operationCount to 0
@@ -720,6 +729,10 @@ void Gfx::go(GBool topLevel) {
// Run the operation
execOp(&obj, args, numArgs);
+ if (topLevel) {
+ callProgressCallback(parser->getBaseStreamPos());
+ }
+
// Update the profile information
if (profileCommands) {
GooHash *hash;
@@ -860,7 +873,6 @@ void Gfx::execOp(Object *cmd, Object args[], int numArgs) {
// do it
opCount++;
- callProgressCallback();
(this->*op->func)(argPtr, numArgs);
}
@@ -5386,9 +5398,9 @@ void Gfx::setProgressCallback( void (*progressCbk)(int pageNum, float progressPc
progressCallback = progressCbk;
}
-void Gfx::callProgressCallback()
+void Gfx::callProgressCallback(long pos)
{
- if ( progressCallback && progressCallbackTotalOperations>0 )
- progressCallback( progressCallbackPageNum, (float)((double)opCount/progressCallbackTotalOperations), progressCallbackUserData );
+ if (progressCallback && progressTotal > 0)
+ progressCallback(progressCallbackPageNum, (float)pos/progressTotal, progressCallbackUserData);
}
diff --git a/poppler/Gfx.h b/poppler/Gfx.h
index 895be8b..857797b 100644
--- a/poppler/Gfx.h
+++ b/poppler/Gfx.h
@@ -220,6 +220,7 @@ private:
int updateLevel;
unsigned long opCount; // total opCount since the first call to display() with topLevel=true
+ int progressTotal;
GfxState *state; // current graphics state
int stackHeight; // the height of the current graphics stack
@@ -243,7 +244,7 @@ private:
// call the progress callback, or just return if we don't have one
- void callProgressCallback();
+ void callProgressCallback(long pos);
void (*progressCallback)(int pageNum, float progressPct, void* userData);
void* progressCallbackUserData;
int progressCallbackPageNum;
diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc
index 01b730b..df245fa 100644
--- a/poppler/Lexer.cc
+++ b/poppler/Lexer.cc
@@ -79,6 +79,7 @@ Lexer::Lexer(XRef *xrefA, Stream *str) {
strPtr = 0;
freeArray = gTrue;
curStr.streamReset();
+ curStrTotal = 0;
}
Lexer::Lexer(XRef *xrefA, Object *obj) {
@@ -100,6 +101,7 @@ Lexer::Lexer(XRef *xrefA, Object *obj) {
streams->get(strPtr, &curStr);
curStr.streamReset();
}
+ curStrTotal = 0;
}
Lexer::~Lexer() {
@@ -126,6 +128,8 @@ int Lexer::getChar(GBool comesFromLook) {
if (comesFromLook == gTrue) {
return EOF;
} else {
+ BaseStream *base = curStr.getStream()->getBaseStream();
+ curStrTotal += base->getLength();
curStr.streamClose();
curStr.free();
++strPtr;
@@ -606,3 +610,13 @@ void Lexer::skipToNextLine() {
GBool Lexer::isSpace(int c) {
return c >= 0 && c <= 0xff && specialChars[c] == 1;
}
+
+int Lexer::getBaseStreamPos()
+{
+ if (curStr.isStream()) {
+ BaseStream *base = curStr.getStream()->getBaseStream();
+ return curStrTotal + base->getPos() - base->getStart();
+ } else {
+ return curStrTotal;
+ }
+}
diff --git a/poppler/Lexer.h b/poppler/Lexer.h
index 284479d..00d7ec1 100644
--- a/poppler/Lexer.h
+++ b/poppler/Lexer.h
@@ -78,6 +78,8 @@ public:
// Returns true if <c> is a whitespace character.
static GBool isSpace(int c);
+ // Get cumulative relative position in the array of content streams
+ int getBaseStreamPos();
// often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar
// is called right after lookChar. In order to avoid expensive re-doing
@@ -98,6 +100,7 @@ private:
Object curStr; // current stream
GBool freeArray; // should lexer free the streams array?
char tokBuf[tokBufSize]; // temporary token buffer
+ int curStrTotal; // total of all streams up to current stream
XRef *xref;
};
diff --git a/poppler/Page.cc b/poppler/Page.cc
index 110005c..614b4d6 100644
--- a/poppler/Page.cc
+++ b/poppler/Page.cc
@@ -527,14 +527,16 @@ void Page::displaySlice(OutputDev *out, double hDPI, double vDPI,
contents.fetch(xref, &obj);
if (!obj.isNull()) {
if ( progressCbk ) {
+#if 1
// create dummy output device to count operations
OutputDev* dummyOutputDev = new DummyOutputDev();
dummyGfx = createGfx(dummyOutputDev, hDPI, vDPI, rotate, useMediaBox, crop,
sliceX, sliceY, sliceW, sliceH, printing, abortCheckCbk, abortCheckCbkData);
dummyGfx->display(&obj);
unsigned long totalOperations = dummyGfx->getOperationCount();
+#endif
// set progress callback on the real gfx object
- gfx->setProgressCallback( progressCbk, progressCbkData, num/*pageNum*/, totalOperations );
+ gfx->setProgressCallback( progressCbk, progressCbkData, num/*pageNum*/, 0);
}
gfx->saveState();
gfx->display(&obj);
diff --git a/poppler/Parser.h b/poppler/Parser.h
index 5ab4099..301676b 100644
--- a/poppler/Parser.h
+++ b/poppler/Parser.h
@@ -60,6 +60,9 @@ public:
// Get current position in file.
int getPos() { return lexer->getPos(); }
+ // Get cumulative relative position in the array of content streams
+ int getBaseStreamPos() { return lexer->getBaseStreamPos(); }
+
private:
XRef *xref; // the xref table for this PDF file
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler