Justus Winter, le Tue 13 May 2014 21:02:52 +0200, a écrit : > libihash uses open addressing. Previously, quadratic probing in both > directions was used to resolve collisions. Quadratic probing might > result in a less efficient use of caches. > > Also, prime numbers of the form 4 * i + 3 were used as array sizes. > This was used in combination with the integer modulo operation for > hashing. It has been known for some time that libihash suffers from > collisions, so a integer hash function is now applied to the keys to > derive the index. > > Use linear probing instead. Also, use powers of two for the array > sizes, so a bit mask can be used for the modulo operation.
Ack. > * libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove. > (find_index): Use linear probing and fast modulo operation. > (add_one): Likewise. > * libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro. > --- > libihash/ihash.c | 125 > +++++++------------------------------------------------ > libihash/ihash.h | 4 ++ > 2 files changed, 19 insertions(+), 110 deletions(-) > > diff --git a/libihash/ihash.c b/libihash/ihash.c > index 71ddc26..d628d75 100644 > --- a/libihash/ihash.c > +++ b/libihash/ihash.c > @@ -31,55 +31,6 @@ > #include <assert.h> > > #include "ihash.h" > - > - > -/* The prime numbers of the form 4 * i + 3 for some i, all greater > - than twice the previous one and smaller than 2^40 (for now). */ > -static const uint64_t ihash_sizes[] = > -{ > - 3, > - 7, > - 19, > - 43, > - 103, > - 211, > - 431, > - 863, > - 1747, > - 3499, > - 7019, > - 14051, > - 28111, > - 56239, > - 112507, > - 225023, > - 450067, > - 900139, > - 1800311, > - 3600659, > - 7201351, > - 14402743, > - 28805519, > - 57611039, > - 115222091, > - 230444239, > - 460888499, > - 921777067, > - 1843554151, > - UINT64_C (3687108307), > - UINT64_C (7374216631), > - UINT64_C (14748433279), > - UINT64_C (29496866579), > - UINT64_C (58993733159), > - UINT64_C (117987466379), > - UINT64_C (235974932759), > - UINT64_C (471949865531), > - UINT64_C (943899731087) > -}; > - > -static const unsigned int ihash_nsizes = (sizeof ihash_sizes > - / sizeof ihash_sizes[0]); > - > > /* This is the integer finalizer from MurmurHash3. */ > static inline uint32_t > @@ -120,40 +71,24 @@ static inline int > find_index (hurd_ihash_t ht, hurd_ihash_key_t key) > { > unsigned int idx; > - unsigned int i; > unsigned int up_idx; > - unsigned int down_idx; > + unsigned int mask = ht->size - 1; > > - idx = murmur3_mix32 (key, 32) % ht->size; > + idx = murmur3_mix32 (key, __builtin_ctz (ht->size)); > > if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key) > return idx; > > - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2, > - we add 1, 3, 5, 7, etc to the previous index. We do this in both > - directions separately. */ > - i = 1; > up_idx = idx; > - down_idx = idx; > > do > { > - up_idx = (up_idx + i) % ht->size; > + up_idx = (up_idx + 1) & mask; > if (ht->items[up_idx].value == _HURD_IHASH_EMPTY > || ht->items[up_idx].key == key) > return up_idx; > - > - if (down_idx < i) > - down_idx += ht->size; > - down_idx = (down_idx - i) % ht->size; > - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY > - || ht->items[down_idx].key == key) > - return down_idx; > - > - /* After (ht->size - 1) / 2 iterations, this will be 0. */ > - i = (i + 2) % ht->size; > } > - while (i); > + while (up_idx != idx); > > /* If we end up here, the item could not be found. Return any > invalid index. */ > @@ -277,52 +212,25 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, > hurd_ihash_value_t value) > unsigned int idx; > unsigned int first_free; > > - idx = murmur3_mix32 (key, 32) % ht->size; > + idx = murmur3_mix32 (key, __builtin_ctz (ht->size)); > first_free = idx; > > if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key) > { > - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + > - i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index. > - We do this in both directions separately. */ > - unsigned int i = 1; > + unsigned int mask = ht->size - 1; > unsigned int up_idx = idx; > - unsigned int down_idx = idx; > - > + > do > { > - up_idx = (up_idx + i) % ht->size; > + up_idx = (up_idx + 1) & mask; > if (ht->items[up_idx].value == _HURD_IHASH_EMPTY > || ht->items[up_idx].key == key) > { > idx = up_idx; > break; > } > - if (first_free == idx > - && ht->items[up_idx].value == _HURD_IHASH_DELETED) > - first_free = up_idx; > - > - if (down_idx < i) > - down_idx += ht->size; > - down_idx = (down_idx - i) % ht->size; > - if (down_idx < 0) > - down_idx += ht->size; > - else > - down_idx %= ht->size; > - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY > - || ht->items[down_idx].key == key) > - { > - idx = down_idx; > - break; > - } > - if (first_free == idx > - && ht->items[down_idx].value == _HURD_IHASH_DELETED) > - first_free = down_idx; > - > - /* After (ht->size - 1) / 2 iterations, this will be 0. */ > - i = (i + 2) % ht->size; > } > - while (i); > + while (up_idx != idx); > } > > /* Remove the old entry for this key if necessary. */ > @@ -365,21 +273,18 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, > hurd_ihash_value_t item) > if (ht->size) > { > /* Only fill the hash table up to its maximum load factor. */ > - if (ht->nr_items * 100 / ht->size <= ht->max_load) > + if ((ht->nr_items * 100) >> __builtin_ctz (ht->size) <= ht->max_load) > if (add_one (ht, key, item)) > return 0; > } > > /* The hash table is too small, and we have to increase it. */ > - for (i = 0; i < ihash_nsizes; i++) > - if (ihash_sizes[i] > old_ht.size) > - break; > - if (i == ihash_nsizes > - || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item)) > - return ENOMEM; /* Surely will be true momentarily. */ > - > ht->nr_items = 0; > - ht->size = ihash_sizes[i]; > + if (ht->size == 0) > + ht->size = HURD_IHASH_MIN_SIZE; > + else > + ht->size <<= 1; > + > /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. > */ > ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item)); > > diff --git a/libihash/ihash.h b/libihash/ihash.h > index a24f384..8829e51 100644 > --- a/libihash/ihash.h > +++ b/libihash/ihash.h > @@ -93,6 +93,10 @@ typedef struct hurd_ihash *hurd_ihash_t; > > /* Construction and destruction of hash tables. */ > > +/* The size of the initial allocation in number of items. This must > + be a power of two. */ > +#define HURD_IHASH_MIN_SIZE 32 > + > /* The default value for the maximum load factor in percent. */ > #define HURD_IHASH_MAX_LOAD_DEFAULT 75 > > -- > 2.0.0.rc0 > -- Samuel <b> il faut combien de chevaux pour tirer une doloréan à 88 morph ? ***b vient de remarque que 88 mph c'est 142 km/h <y> aaaaah <y> c'est pour ça qu'ils limitent à 130 km/h sur les autoroutes <y> c'est pour éviter que les gens voyagent dans le temps <b> probablement