Hello again again, Am 08.05.2018 um 07:37 schrieb Adam Reichold: > Hello again, > > Am 08.05.2018 um 00:12 schrieb Albert Astals Cid: >> El dilluns, 7 de maig de 2018, a les 23:03:00 CEST, Adam Reichold va >> escriure: >>> Hello, >>> >>> it is probably already too much purely technical code churn for this >>> release cycle, but the use of memmove got me thinking how an >>> implementation of Dict with more STL usage would look like. >>> >>> The result is attached, requesting comments on whether this is >>> considered useful or not. >> >> The remove implementation is wrong since it breaks sorting. > > Could you explain in more detail? My thinking was that any sublist of a > sorted list is also sorted and hence removing an element of a list > cannot break its sorting? (And since we do not increase the size by > removing an element, we cannot reach SORT_LENGTH_LOWER_LIMIT if we were > not there already.)
Sorry, that took a minute... Because of the swap... But actually, there already is std::vector::erase which does all the tricks like using memmove if it is statically safe, so it should actually be simpler. Fixed version attached... > Best regards, > Adam > >> Cheers, >> Albert >> >>> >>> Best regards, >>> Adam >>> >>> Am 07.05.2018 um 19:14 schrieb Albert Astals Cid: >>>> poppler/Array.cc | 2 +- >>>> poppler/Dict.cc | 2 +- >>>> 2 files changed, 2 insertions(+), 2 deletions(-) >>>> >>>> New commits: >>>> commit 07b8f838b3d8859a3ad34a3140bb24475bd6ac2c >>>> Author: Albert Astals Cid <[email protected]> >>>> Date: Mon May 7 19:13:07 2018 +0200 >>>> >>>> cast to void * to bypass new gcc -Wclass-memaccess warning >>>> >>>> Yes what we're doing there is a bit nasty but it works :D >>>> >>>> diff --git a/poppler/Array.cc b/poppler/Array.cc >>>> index 3276349f..e8aa34ea 100644 >>>> --- a/poppler/Array.cc >>>> +++ b/poppler/Array.cc >>>> @@ -112,7 +112,7 @@ void Array::remove(int i) { >>>> >>>> #endif >>>> >>>> } >>>> --length; >>>> >>>> - memmove( elems + i, elems + i + 1, sizeof(elems[0]) * (length - i) ); >>>> + memmove( static_cast<void*>(elems + i), elems + i + 1, sizeof(elems[0]) >>>> * (length - i) );> >>>> } >>>> >>>> Object Array::get(int i, int recursion) const { >>>> >>>> diff --git a/poppler/Dict.cc b/poppler/Dict.cc >>>> index a431f7eb..bc86fd77 100644 >>>> --- a/poppler/Dict.cc >>>> +++ b/poppler/Dict.cc >>>> @@ -201,7 +201,7 @@ void Dict::remove(const char *key) { >>>> >>>> gfree(entries[pos].key); >>>> entries[pos].val.free(); >>>> if (pos != length) { >>>> >>>> - memmove(&entries[pos], &entries[pos + 1], (length - pos) * >>>> sizeof(DictEntry)); + memmove(static_cast<void*>(&entries[pos]), >>>> &entries[pos + 1], (length - pos) * sizeof(DictEntry));> >>>> } >>>> >>>> } >>>> >>>> } else { >>>> >>>> _______________________________________________ >>>> poppler mailing list >>>> [email protected] >>>> https://lists.freedesktop.org/mailman/listinfo/poppler >> >> >> >> >> _______________________________________________ >> poppler mailing list >> [email protected] >> https://lists.freedesktop.org/mailman/listinfo/poppler >> > > > > _______________________________________________ > poppler mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/poppler >
From 97e34954ca028b8530518c5488b29c5f4af50fc8 Mon Sep 17 00:00:00 2001 From: Adam Reichold <[email protected]> Date: Mon, 7 May 2018 22:58:43 +0200 Subject: [PATCH] Try to simplify Dict by implementing it in terms of std::vector and std::string. --- poppler/Catalog.cc | 2 +- poppler/Catalog.h | 2 +- poppler/Dict.cc | 235 ++++++++++++++++------------------------------------- poppler/Dict.h | 46 ++++++----- poppler/Form.cc | 2 +- poppler/Object.h | 4 +- 6 files changed, 98 insertions(+), 193 deletions(-) diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc index d259bda6..eba626e6 100644 --- a/poppler/Catalog.cc +++ b/poppler/Catalog.cc @@ -428,7 +428,7 @@ int Catalog::numDests() return obj->dictGetLength(); } -char *Catalog::getDestsName(int i) +const char *Catalog::getDestsName(int i) { Object *obj; diff --git a/poppler/Catalog.h b/poppler/Catalog.h index 0467159a..2c18b4a2 100644 --- a/poppler/Catalog.h +++ b/poppler/Catalog.h @@ -163,7 +163,7 @@ public: int numDests(); // Get the i'th named destination name in name-dict - char *getDestsName(int i); + const char *getDestsName(int i); // Get the i'th named destination link destination in name-dict LinkDest *getDestsDest(int i); diff --git a/poppler/Dict.cc b/poppler/Dict.cc index bc86fd77..618799e5 100644 --- a/poppler/Dict.cc +++ b/poppler/Dict.cc @@ -51,247 +51,150 @@ // Dict //------------------------------------------------------------------------ -static const int SORT_LENGTH_LOWER_LIMIT = 32; +constexpr int SORT_LENGTH_LOWER_LIMIT = 32; -static inline bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2) -{ - return strcmp(e1.key, e2.key) < 0; -} - -static int binarySearch(const char *key, DictEntry *entries, int length) -{ - int first = 0; - int end = length - 1; - while (first <= end) { - const int middle = (first + end) / 2; - const int res = strcmp(key, entries[middle].key); - if (res == 0) { - return middle; - } else if (res < 0) { - end = middle - 1; - } else { - first = middle + 1; - } +struct Dict::CmpDictEntry { + bool operator()(const DictEntry &lhs, const DictEntry &rhs) const { + return lhs.first < rhs.first; } - return -1; -} + bool operator()(const DictEntry &lhs, const char *rhs) const { + return lhs.first < rhs; + } + bool operator()(const char *lhs, const DictEntry &rhs) const { + return lhs < rhs.first; + } +}; Dict::Dict(XRef *xrefA) { xref = xrefA; - entries = nullptr; - size = length = 0; ref = 1; - sorted = gFalse; #ifdef MULTITHREADED gInitMutex(&mutex); #endif + + sorted = false; } -Dict::Dict(Dict* dictA) { +Dict::Dict(const Dict* dictA) { xref = dictA->xref; - size = length = dictA->length; ref = 1; #ifdef MULTITHREADED gInitMutex(&mutex); #endif - sorted = dictA->sorted; - entries = (DictEntry *)gmallocn(size, sizeof(DictEntry)); - for (int i=0; i<length; i++) { - entries[i].key = copyString(dictA->entries[i].key); - entries[i].val.initNullAfterMalloc(); - entries[i].val = dictA->entries[i].val.copy(); + entries.reserve(dictA->entries.size()); + for (const auto& entry : dictA->entries) { + entries.emplace_back(entry.first, entry.second.copy()); } + + sorted = dictA->sorted; } -Dict *Dict::copy(XRef *xrefA) { +Dict *Dict::copy(XRef *xrefA) const { dictLocker(); Dict *dictA = new Dict(this); dictA->xref = xrefA; - for (int i=0; i<length; i++) { - if (dictA->entries[i].val.getType() == objDict) { - Dict *copy = dictA->entries[i].val.getDict()->copy(xrefA); - dictA->entries[i].val = Object(copy); + for (auto &entry : dictA->entries) { + if (entry.second.getType() == objDict) { + entry.second = Object(entry.second.getDict()->copy(xrefA)); } } return dictA; } Dict::~Dict() { - int i; - - for (i = 0; i < length; ++i) { - gfree(entries[i].key); - entries[i].val.free(); - } - gfree(entries); #ifdef MULTITHREADED gDestroyMutex(&mutex); #endif } -int Dict::incRef() { - dictLocker(); - ++ref; - return ref; -} - -int Dict::decRef() { - dictLocker(); - --ref; - return ref; -} - -void Dict::add(char *key, Object &&val) { +void Dict::add(const char *key, Object &&val) { dictLocker(); - if (sorted) { - // We use add on very few occasions so - // virtually this will never be hit - sorted = gFalse; - } - - if (length == size) { - if (length == 0) { - size = 8; - } else { - size *= 2; + if (entries.size() >= SORT_LENGTH_LOWER_LIMIT) { + if (!sorted) { + std::sort(entries.begin(), entries.end(), CmpDictEntry{}); + sorted = true; } - entries = (DictEntry *)greallocn(entries, size, sizeof(DictEntry)); + const auto pos = std::upper_bound(entries.begin(), entries.end(), key, CmpDictEntry{}); + entries.emplace(pos, key, std::move(val)); + } else { + entries.emplace_back(key, std::move(val)); + sorted = false; } - entries[length].key = key; - entries[length].val.initNullAfterMalloc(); - entries[length].val = std::move(val); - ++length; } -inline DictEntry *Dict::find(const char *key) const { - if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT) - { - dictLocker(); - sorted = gTrue; - std::sort(entries, entries+length, cmpDictEntries); - } - +inline const Dict::DictEntry *Dict::find(const char *key) const { if (sorted) { - const int pos = binarySearch(key, entries, length); - if (pos != -1) { - return &entries[pos]; + const auto pos = std::lower_bound(entries.begin(), entries.end(), key, CmpDictEntry{}); + if (pos != entries.end() && pos->first == key) { + return &*pos; } } else { - int i; - - for (i = length - 1; i >=0; --i) { - if (!strcmp(key, entries[i].key)) - return &entries[i]; + const auto pos = std::find_if(entries.rbegin(), entries.rend(), [key](const DictEntry& entry) { + return entry.first == key; + }); + if (pos != entries.rend()) { + return &*pos; } } return nullptr; } -GBool Dict::hasKey(const char *key) const { - return find(key) != nullptr; +inline Dict::DictEntry *Dict::find(const char *key) { + return const_cast<DictEntry *>(const_cast<const Dict *>(this)->find(key)); } void Dict::remove(const char *key) { dictLocker(); - if (sorted) { - const int pos = binarySearch(key, entries, length); - if (pos != -1) { - length -= 1; - gfree(entries[pos].key); - entries[pos].val.free(); - if (pos != length) { - memmove(static_cast<void*>(&entries[pos]), &entries[pos + 1], (length - pos) * sizeof(DictEntry)); - } - } - } else { - int i; - bool found = false; - if(length == 0) { - return; - } - - for(i=0; i<length; i++) { - if (!strcmp(key, entries[i].key)) { - found = true; - break; - } - } - if(!found) { - return; - } - //replace the deleted entry with the last entry - gfree(entries[i].key); - entries[i].val.free(); - length -= 1; - if (i!=length) { - //don't copy the last entry if it is deleted - entries[i].key = entries[length].key; - entries[i].val = std::move(entries[length].val); - } + if (const auto *entry = find(key)) { + entries.erase(std::vector<DictEntry>::const_iterator{entry}); } } void Dict::set(const char *key, Object &&val) { - DictEntry *e; if (val.isNull()) { remove(key); return; } - e = find (key); - if (e) { - dictLocker(); - e->val = std::move(val); + dictLocker(); + if (auto *entry = find(key)) { + entry->second = std::move(val); } else { - add (copyString(key), std::move(val)); + add(key, std::move(val)); } } GBool Dict::is(const char *type) const { - DictEntry *e; - - return (e = find("Type")) && e->val.isName(type); + if (const auto *entry = find("Type")) { + return entry->second.isName(type); + } + return gFalse; } Object Dict::lookup(const char *key, int recursion) const { - DictEntry *e; - - return (e = find(key)) ? e->val.fetch(xref, recursion) : Object(objNull); + if (const auto *entry = find(key)) { + return entry->second.fetch(xref, recursion); + } + return Object(objNull); } Object Dict::lookupNF(const char *key) const { - DictEntry *e; - - return (e = find(key)) ? e->val.copy() : Object(objNull); + if (const auto *entry = find(key)) { + return entry->second.copy(); + } + return Object(objNull); } GBool Dict::lookupInt(const char *key, const char *alt_key, int *value) const { - GBool success = gFalse; - Object obj1 = lookup ((char *) key); - if (obj1.isNull () && alt_key != nullptr) { - obj1.free (); - obj1 = lookup ((char *) alt_key); + Object obj1 = lookup(key); + if (obj1.isNull() && alt_key != nullptr) { + obj1 = lookup(alt_key); } - if (obj1.isInt ()) { - *value = obj1.getInt (); - success = gTrue; + if (obj1.isInt()) { + *value = obj1.getInt(); + return gTrue; } - - obj1.free (); - - return success; -} - -char *Dict::getKey(int i) const { - return entries[i].key; -} - -Object Dict::getVal(int i) const { - return entries[i].val.fetch(xref); -} - -Object Dict::getValNF(int i) const { - return entries[i].val.copy(); + return gFalse; } diff --git a/poppler/Dict.h b/poppler/Dict.h index 9fd732a1..276d8c58 100644 --- a/poppler/Dict.h +++ b/poppler/Dict.h @@ -33,6 +33,11 @@ #pragma interface #endif +#include <atomic> +#include <string> +#include <vector> +#include <utility> + #include "poppler-config.h" #include "Object.h" #include "goo/GooMutex.h" @@ -41,18 +46,13 @@ // Dict //------------------------------------------------------------------------ -struct DictEntry { - char *key; - Object val; -}; - class Dict { public: // Constructor. Dict(XRef *xrefA); - Dict(Dict* dictA); - Dict *copy(XRef *xrefA); + Dict(const Dict *dictA); + Dict *copy(XRef *xrefA) const; // Destructor. ~Dict(); @@ -61,11 +61,11 @@ public: Dict& operator=(const Dict &) = delete; // Get number of entries. - int getLength() const { return length; } + int getLength() const { return static_cast<int>(entries.size()); } - // Add an entry. NB: does not copy key. + // Add an entry. // val becomes a dead object after the call - void add(char *key, Object &&val); + void add(const char *key, Object &&val); // Update the value of an existing entry, otherwise create it // val becomes a dead object after the call @@ -83,9 +83,9 @@ public: GBool lookupInt(const char *key, const char *alt_key, int *value) const; // Iterative accessors. - char *getKey(int i) const; - Object getVal(int i) const; - Object getValNF(int i) const; + const char *getKey(int i) const { return entries[i].first.c_str(); } + Object getVal(int i) const { return entries[i].second.fetch(xref); } + Object getValNF(int i) const { return entries[i].second.copy(); } // Set the xref pointer. This is only used in one special case: the // trailer dictionary, which is read before the xref table is @@ -94,26 +94,28 @@ public: XRef *getXRef() const { return xref; } - GBool hasKey(const char *key) const; + GBool hasKey(const char *key) const { return find(key) != nullptr; } private: friend class Object; // for incRef/decRef // Reference counting. - int incRef(); - int decRef(); + int incRef() { return ++ref; } + int decRef() { return --ref; } + + using DictEntry = std::pair<std::string, Object>; + struct CmpDictEntry; - mutable GBool sorted; + bool sorted; XRef *xref; // the xref table for this PDF file - DictEntry *entries; // array of entries - int size; // size of <entries> array - int length; // number of entries in dictionary - int ref; // reference count + std::vector<DictEntry> entries; + std::atomic_int ref; // reference count #ifdef MULTITHREADED mutable GooMutex mutex; #endif - DictEntry *find(const char *key) const; + const DictEntry *find(const char *key) const; + DictEntry *find(const char *key); }; #endif diff --git a/poppler/Form.cc b/poppler/Form.cc index 820944e8..a4aab1a5 100644 --- a/poppler/Form.cc +++ b/poppler/Form.cc @@ -193,7 +193,7 @@ FormWidgetButton::FormWidgetButton (PDFDoc *docA, Object *aobj, unsigned num, Re Object obj2 = obj1.dictLookup("N"); if (obj2.isDict()) { for (int i = 0; i < obj2.dictGetLength(); i++) { - char *key = obj2.dictGetKey(i); + const char *key = obj2.dictGetKey(i); if (strcmp (key, "Off") != 0) { onStr = new GooString (key); break; diff --git a/poppler/Object.h b/poppler/Object.h index 97978f46..bcfd8ba9 100644 --- a/poppler/Object.h +++ b/poppler/Object.h @@ -269,7 +269,7 @@ public: GBool dictIs(const char *dictType) const; Object dictLookup(const char *key, int recursion = 0) const; Object dictLookupNF(const char *key) const; - char *dictGetKey(int i) const; + const char *dictGetKey(int i) const; Object dictGetVal(int i) const; Object dictGetValNF(int i) const; @@ -374,7 +374,7 @@ inline Object Object::dictLookup(const char *key, int recursion) const inline Object Object::dictLookupNF(const char *key) const { OBJECT_TYPE_CHECK(objDict); return dict->lookupNF(key); } -inline char *Object::dictGetKey(int i) const +inline const char *Object::dictGetKey(int i) const { OBJECT_TYPE_CHECK(objDict); return dict->getKey(i); } inline Object Object::dictGetVal(int i) const -- 2.16.3
signature.asc
Description: OpenPGP digital signature
_______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
