A Dijous, 26 d'agost de 2010, Paweł Wiejacha va escriure:
> On 26.08.2010 23:36, Albert Astals Cid wrote:
> > Do you really have a PDF with a Dictionary of 2000 entries?
> 
> Sure. http://students.mimuw.edu.pl/~pw248348/opos/perf_tests.tar.gz
> perf_tests/pdfs/104418018297-AttenInSuspensionsIrregularlyShapedSedimentPar
> ticles.pdf page 6

Attached my suggestion as patch for a Dict sort for "big" Dicts (>= 32 
entries) (tests show little or no speed increase on small dicts).

Comments?

Albert

> 
> regards,
> Paweł Wiejacha.
> _______________________________________________
> poppler mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/poppler
diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index a899bca..8174c40 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -29,6 +29,7 @@
 #pragma implementation
 #endif
 
+#include <algorithm>
 #include <stddef.h>
 #include <string.h>
 #include "goo/gmem.h"
@@ -40,11 +41,37 @@
 // Dict
 //------------------------------------------------------------------------
 
+static const int SORT_LENGTH_LOWER_LIMIT = 32;
+
+static bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2)
+{
+  return strcmp(e1.key, e2.key) < 0;
+}
+
+static int binarySearch(const char *key, DictEntry *entries, int length)
+{
+  int first = 0;
+  int end = length - 1;
+  while (first <= end) {
+    const int middle = (first + end) / 2;
+    const int res = strcmp(key, entries[middle].key);
+    if (res == 0) {
+      return middle;
+    } else if (res < 0) {
+      end = middle - 1;
+    } else {
+      first = middle + 1;
+    }
+  }
+  return -1;
+}
+
 Dict::Dict(XRef *xrefA) {
   xref = xrefA;
   entries = NULL;
   size = length = 0;
   ref = 1;
+  sorted = gFalse;
 }
 
 Dict::Dict(Dict* dictA) {
@@ -52,6 +79,7 @@ Dict::Dict(Dict* dictA) {
   size = length = dictA->length;
   ref = 1;
 
+  sorted = dictA->sorted;
   entries = (DictEntry *)gmallocn(size, sizeof(DictEntry));
   for (int i=0; i<length; i++) {
     entries[i].key = strdup(dictA->entries[i].key);
@@ -70,6 +98,12 @@ Dict::~Dict() {
 }
 
 void Dict::add(char *key, Object *val) {
+  if (sorted) {
+    // We use add on very few occasions so
+    // virtually this will never be hit
+    sorted = gFalse;
+  }
+
   if (length == size) {
     if (length == 0) {
       size = 8;
@@ -84,16 +118,38 @@ void Dict::add(char *key, Object *val) {
 }
 
 inline DictEntry *Dict::find(char *key) {
+  if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT)
+  {
+      sorted = true;
+      std::sort(entries, entries+length, cmpDictEntries);
+  }
+
+  if (sorted) {
+    const int pos = binarySearch(key, entries, length);
+    if (pos != -1) {
+      return &entries[pos];
+    }
+  } else {
   int i;
 
   for (i = length - 1; i >=0; --i) {
     if (!strcmp(key, entries[i].key))
       return &entries[i];
   }
+  }
   return NULL;
 }
 
 void Dict::remove(char *key) {
+  if (sorted) {
+    const int pos = binarySearch(key, entries, length);
+    if (pos != -1) {
+      length -= 1;
+      if (pos != length) {
+        memmove(&entries[pos], &entries[pos + 1], (length - pos) * sizeof(DictEntry));
+      }
+    }
+  } else {
   int i; 
   bool found = false;
   DictEntry tmp;
@@ -112,6 +168,7 @@ void Dict::remove(char *key) {
   if (i!=length) //don't copy the last entry if it is deleted 
     entries[i] = tmp;
 }
+}
 
 void Dict::set(char *key, Object *val) {
   DictEntry *e;
diff --git a/poppler/Dict.h b/poppler/Dict.h
index a76bc89..dbb3893 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -89,6 +89,7 @@ public:
 
 private:
 
+  GBool sorted;
   XRef *xref;			// the xref table for this PDF file
   DictEntry *entries;		// array of entries
   int size;			// size of <entries> array
_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to