Marc Dequènes (Duck) <d...@duckcorp.org> ha escrit: > I've already used this setting. It works well if you search across > *all* databases, but if you specify one, the search goes on and > displays the whole database content after several minutes hanging the > machine (please don't experiment with my server).
I see your point, but what you report is another bug, not related to that particular search strategy. It becomes prominent when "all" is used (because of a huge number of elements involved), but it affects other searches as well. Please apply the attached patch. It will fix the response procedure for all types of the queries (both "match" and "define") in dictorg databases. This will make single-database "all" matches feasible in terms of time usage. On the other hand, I agree that a mechanism for disabling arbitrary strategies is needed (both on database level and globally). I will provide a solution for this latter. Regards, Sergey
>From 9b10f09671e8498ee9862bd9190fc1fb324e35e7 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff <g...@gnu.org.ua> Date: Mon, 24 May 2010 14:26:00 +0300 Subject: [PATCH] Speed up output procedure in dictorg. Provide a general-purpose mechanism to address iterators by item number in O(|n-pos|) time. * include/dico/list.h (dico_iterator_prev) (dico_iterator_item, dico_iterator_position): New prototypes. * lib/list.c (list_entry) <prev>: New member. (iterator) <pos>: New member. (dico_iterator_position): New function. (_iterator_increase_pos): New static. (dico_iterator_first): Initialize pos to 0. (dico_iterator_next): Increase pos. (dico_iterator_prev,dico_iterator_item): New function. (_dico_list_append): Initialize ep->prev. (_dico_list_prepend): Initialize ep->prev. Call _iterator_increase_pos to tell iterators to update their recorded positions. (_dico_list_remove): Rewrite removal code using next & prev pointers. (_dico_list_insert_sorted): Update next & prev pointers. Call _iterator_increase_pos. * modules/dict.org/dictorg.h (result) <itr>: New member. * modules/dict.org/dictorg.c (common_match) (suffix_match, _match_all): Initialize itr. (mod_output_result): Use iterator to avoid rescanning the list on each call. (mod_free_result): Destroy the iterator. * lib/utf8.c (utf8_strcasecmp, utf8_strncasecmp): Break the loop if alen or blen is zero. This means that one of the operands is not utf8, but try to return meaningful value anyway. --- include/dico/list.h | 3 + lib/list.c | 126 +++++++++++++++++++++++++++++++++++--------- lib/utf8.c | 8 +++ modules/dict.org/dictorg.c | 14 +++++- modules/dict.org/dictorg.h | 1 + 5 files changed, 126 insertions(+), 26 deletions(-) diff --git a/include/dico/list.h b/include/dico/list.h index 8f5419d..4ca5d40 100644 --- a/include/dico/list.h +++ b/include/dico/list.h @@ -64,6 +64,9 @@ dico_iterator_t dico_list_iterator(dico_list_t list); void dico_iterator_destroy(dico_iterator_t *ip); void *dico_iterator_first(dico_iterator_t ip); void *dico_iterator_next(dico_iterator_t ip); +void *dico_iterator_prev(dico_iterator_t ip); +void *dico_iterator_item(dico_iterator_t ip, size_t n); +size_t dico_iterator_position(dico_iterator_t ip); int dico_iterator_remove_current(dico_iterator_t ip, void **pptr); void dico_iterator_set_data(dico_iterator_t ip, void *data); diff --git a/lib/list.c b/lib/list.c index 309369c..9d1da6b 100644 --- a/lib/list.c +++ b/lib/list.c @@ -23,7 +23,7 @@ #include <errno.h> struct list_entry { - struct list_entry *next; + struct list_entry *next, *prev; void *data; }; @@ -42,6 +42,7 @@ struct iterator { dico_list_t list; struct list_entry *cur; int advanced; + size_t pos; }; static int @@ -120,13 +121,22 @@ dico_iterator_current(dico_iterator_t ip) return ip->cur ? ip->cur->data : NULL; } +size_t +dico_iterator_position(dico_iterator_t ip) +{ + if (!ip) + return 0; + return ip->pos; +} + static void dico_iterator_attach(dico_iterator_t itr, dico_list_t list) { itr->list = list; - itr->cur = NULL; + itr->cur = list->head; itr->next = list->itr; itr->advanced = 0; + itr->pos = 0; list->itr = itr; } @@ -178,6 +188,26 @@ dico_iterator_destroy(dico_iterator_t *ip) *ip = NULL; } +static void +_iterator_increase_pos(dico_iterator_t ip, size_t after) +{ + for (; ip; ip = ip->next) { + if (ip->pos > after) + ip->pos++; + } +} + +static void +_iterator_advance(dico_iterator_t ip, struct list_entry *e) +{ + for (; ip; ip = ip->next) { + if (ip->cur == e) { + ip->cur = e->next; + ip->advanced++; + } + } +} + void * dico_iterator_first(dico_iterator_t ip) { @@ -185,6 +215,7 @@ dico_iterator_first(dico_iterator_t ip) return NULL; ip->cur = ip->list->head; ip->advanced = 0; + ip->pos = 0; return dico_iterator_current(ip); } @@ -193,12 +224,53 @@ dico_iterator_next(dico_iterator_t ip) { if (!ip || !ip->cur) return NULL; - if (!ip->advanced) + if (!ip->advanced) { ip->cur = ip->cur->next; + ip->pos++; + } ip->advanced = 0; return dico_iterator_current(ip); } +void * +dico_iterator_prev(dico_iterator_t ip) +{ + if (!ip || !ip->cur) + return NULL; + ip->cur = ip->cur->prev; + if (!ip->advanced) + ip->pos--; + ip->advanced = 0; + return dico_iterator_current(ip); +} + +void * +dico_iterator_item(dico_iterator_t ip, size_t n) +{ + if (n > ip->pos) { + if (!ip->advanced) { + ip->cur = ip->cur->next; + ip->pos++; + } + ip->advanced = 0; + + while (ip->cur && ip->pos < n) { + ip->cur = ip->cur->next; + ip->pos++; + } + } else if (n < ip->pos) { + if (!ip->advanced) + ip->pos--; + ip->advanced = 0; + + while (ip->cur && ip->pos > n) { + ip->cur = ip->cur->prev; + ip->pos--; + } + } + return dico_iterator_current(ip); +} + int dico_iterator_remove_current(dico_iterator_t ip, void **pptr) { @@ -211,17 +283,6 @@ dico_iterator_set_data(dico_iterator_t ip, void *data) ip->cur->data = data; } -static void -_iterator_advance(dico_iterator_t ip, struct list_entry *e) -{ - for (; ip; ip = ip->next) { - if (ip->cur == e) { - ip->cur = e->next; - ip->advanced++; - } - } -} - void * dico_list_item(struct dico_list *list, size_t n) { @@ -305,6 +366,7 @@ _dico_list_append(struct dico_list *list, void *data) if (!ep) return 1; ep->next = NULL; + ep->prev = list->tail; ep->data = data; if (list->tail) list->tail->next = ep; @@ -323,10 +385,12 @@ _dico_list_prepend(struct dico_list *list, void *data) return 1; ep->data = data; ep->next = list->head; + ep->prev = NULL; list->head = ep; if (!list->tail) list->tail = list->head; list->count++; + _iterator_increase_pos(list->itr, 0); return 0; } @@ -373,7 +437,7 @@ _dico_list_remove(struct dico_list *list, void *data, dico_list_comp_t cmp, if (!cmp) cmp = cmp_ptr; - for (p = list->head, prev = NULL; p; prev = p, p = p->next) + for (p = list->head; p; p = p->next) if (cmp(p->data, data) == 0) break; @@ -383,14 +447,16 @@ _dico_list_remove(struct dico_list *list, void *data, dico_list_comp_t cmp, } _iterator_advance(list->itr, p); - if (p == list->head) { - list->head = list->head->next; - if (!list->head) - list->tail = NULL; - } else - prev->next = p->next; - if (p == list->tail) + prev = p->prev; + if (prev) + prev->next = p->next; + else + list->head = list->head->next; + + if (p->next) + p->next->prev = prev; + else list->tail = prev; free(p); @@ -467,7 +533,8 @@ _dico_list_insert_sorted(struct dico_list *list, void *data, dico_list_comp_t cmp) { int rc; - struct list_entry *cur, *prev; + struct list_entry *cur; + size_t i; if (!list) { errno = EINVAL; @@ -477,21 +544,30 @@ _dico_list_insert_sorted(struct dico_list *list, void *data, if (!cmp) cmp = cmp_ptr; - for (cur = list->head, prev = NULL; cur; prev = cur, cur = cur->next) + for (cur = list->head, i = 0; cur; cur = cur->next, i++) if (cmp(cur->data, data) > 0) break; - if (!prev) { + if (!cur->prev) { rc = _dico_list_prepend(list, data); } else if (!cur) { rc = _dico_list_append(list, data); } else { struct list_entry *ep = malloc(sizeof(*ep)); if (ep) { + struct list_entry *prev = cur->prev; + rc = 0; ep->data = data; + ep->next = cur; + cur->prev = ep; + + ep->prev = prev; prev->next = ep; + + _iterator_increase_pos(list->itr, i - 1); + list->count++; } else rc = 1; diff --git a/lib/utf8.c b/lib/utf8.c index 3acf96a..8d2f479 100644 --- a/lib/utf8.c +++ b/lib/utf8.c @@ -1756,8 +1756,12 @@ utf8_strcasecmp(char *a, char *b) return 1; alen = utf8_char_width(a); + if (alen == 0) + return -1; utf8_mbtowc(&wa, a, alen); blen = utf8_char_width(b); + if (blen == 0) + return 1; utf8_mbtowc(&wb, b, blen); wa = utf8_wc_toupper(wa); wb = utf8_wc_toupper(wb); @@ -1788,8 +1792,12 @@ utf8_strncasecmp(char *a, char *b, size_t maxlen) return 1; alen = utf8_char_width(a); + if (alen == 0) + return -1; utf8_mbtowc(&wa, a, alen); blen = utf8_char_width(b); + if (blen == 0) + return 1; utf8_mbtowc(&wb, b, blen); wa = utf8_wc_toupper(wa); wb = utf8_wc_toupper(wb); diff --git a/modules/dict.org/dictorg.c b/modules/dict.org/dictorg.c index 4d638db..d92f861 100644 --- a/modules/dict.org/dictorg.c +++ b/modules/dict.org/dictorg.c @@ -579,6 +579,7 @@ common_match(struct dictdb *db, const char *word, memerr("common_match"); return 0; } + res->itr = NULL; if (unique) { dico_list_set_comparator(res->list, (int (*)(const void *, void *)) @@ -705,6 +706,7 @@ suffix_match(struct dictdb *db, const char *word, struct result *res) free(tmp); res->type = result_match; res->list = list; + res->itr = NULL; res->compare_count = compare_count; rc = 0; } else @@ -842,6 +844,7 @@ _match_all(struct dictdb *db, const char *word, res->db = db; res->type = result_match; res->list = list; + res->itr = NULL; res->compare_count = compare_count; return (dico_result_t) res; } @@ -914,7 +917,15 @@ static int mod_output_result (dico_result_t rp, size_t n, dico_stream_t str) { struct result *res = (struct result *) rp; - const struct index_entry *ep = dico_list_item(res->list, n); + const struct index_entry *ep; + + if (!res->itr) { + res->itr = dico_list_iterator(res->list); + if (!res->itr) + return 1; + } + ep = dico_iterator_item(res->itr, n); + switch (res->type) { case result_match: @@ -945,6 +956,7 @@ static void mod_free_result(dico_result_t rp) { struct result *res = (struct result *) rp; + dico_iterator_destroy(&res->itr); dico_list_destroy(&res->list); free(rp); } diff --git a/modules/dict.org/dictorg.h b/modules/dict.org/dictorg.h index 6f145f4..8a366dd 100644 --- a/modules/dict.org/dictorg.h +++ b/modules/dict.org/dictorg.h @@ -140,6 +140,7 @@ struct result { enum result_type type; size_t compare_count; dico_list_t list; + dico_iterator_t itr; }; typedef int (*entry_match_t) (struct dictdb *, -- 1.6.0.3