File: | src/dict.c |
Warning: | line 672, column 27 Access to field 'next' results in a dereference of a null pointer (loaded from variable 'he') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Hash Tables Implementation. | |||
2 | * | |||
3 | * This file implements in memory hash tables with insert/del/replace/find/ | |||
4 | * get-random-element operations. Hash tables will auto resize if needed | |||
5 | * tables of power of two in size are used, collisions are handled by | |||
6 | * chaining. See the source code for more information... :) | |||
7 | * | |||
8 | * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com> | |||
9 | * All rights reserved. | |||
10 | * | |||
11 | * Redistribution and use in source and binary forms, with or without | |||
12 | * modification, are permitted provided that the following conditions are met: | |||
13 | * | |||
14 | * * Redistributions of source code must retain the above copyright notice, | |||
15 | * this list of conditions and the following disclaimer. | |||
16 | * * Redistributions in binary form must reproduce the above copyright | |||
17 | * notice, this list of conditions and the following disclaimer in the | |||
18 | * documentation and/or other materials provided with the distribution. | |||
19 | * * Neither the name of Redis nor the names of its contributors may be used | |||
20 | * to endorse or promote products derived from this software without | |||
21 | * specific prior written permission. | |||
22 | * | |||
23 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
24 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
27 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | |||
34 | */ | |||
35 | ||||
36 | #include "fmacros.h" | |||
37 | ||||
38 | #include <stdio.h> | |||
39 | #include <stdlib.h> | |||
40 | #include <stdint.h> | |||
41 | #include <string.h> | |||
42 | #include <stdarg.h> | |||
43 | #include <limits.h> | |||
44 | #include <sys/time.h> | |||
45 | ||||
46 | #include "dict.h" | |||
47 | #include "zmalloc.h" | |||
48 | #ifndef DICT_BENCHMARK_MAIN | |||
49 | #include "redisassert.h" | |||
50 | #else | |||
51 | #include <assert.h> | |||
52 | #endif | |||
53 | ||||
54 | /* Using dictEnableResize() / dictDisableResize() we make possible to | |||
55 | * enable/disable resizing of the hash table as needed. This is very important | |||
56 | * for Redis, as we use copy-on-write and don't want to move too much memory | |||
57 | * around when there is a child performing saving operations. | |||
58 | * | |||
59 | * Note that even when dict_can_resize is set to 0, not all resizes are | |||
60 | * prevented: a hash table is still allowed to grow if the ratio between | |||
61 | * the number of elements and the buckets > dict_force_resize_ratio. */ | |||
62 | static int dict_can_resize = 1; | |||
63 | static unsigned int dict_force_resize_ratio = 5; | |||
64 | ||||
65 | /* -------------------------- private prototypes ---------------------------- */ | |||
66 | ||||
67 | static int _dictExpandIfNeeded(dict *ht); | |||
68 | static unsigned long _dictNextPower(unsigned long size); | |||
69 | static long _dictKeyIndex(dict *ht, const void *key, uint64_t hash, dictEntry **existing); | |||
70 | static int _dictInit(dict *ht, dictType *type, void *privDataPtr); | |||
71 | ||||
72 | /* -------------------------- hash functions -------------------------------- */ | |||
73 | ||||
74 | static uint8_t dict_hash_function_seed[16]; | |||
75 | ||||
76 | void dictSetHashFunctionSeed(uint8_t *seed) { | |||
77 | memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed)); | |||
78 | } | |||
79 | ||||
80 | uint8_t *dictGetHashFunctionSeed(void) { | |||
81 | return dict_hash_function_seed; | |||
82 | } | |||
83 | ||||
84 | /* The default hashing function uses SipHash implementation | |||
85 | * in siphash.c. */ | |||
86 | ||||
87 | uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k); | |||
88 | uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k); | |||
89 | ||||
90 | uint64_t dictGenHashFunction(const void *key, int len) { | |||
91 | return siphash(key,len,dict_hash_function_seed); | |||
92 | } | |||
93 | ||||
94 | uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) { | |||
95 | return siphash_nocase(buf,len,dict_hash_function_seed); | |||
96 | } | |||
97 | ||||
98 | /* ----------------------------- API implementation ------------------------- */ | |||
99 | ||||
100 | /* Reset a hash table already initialized with ht_init(). | |||
101 | * NOTE: This function should only be called by ht_destroy(). */ | |||
102 | static void _dictReset(dictht *ht) | |||
103 | { | |||
104 | ht->table = NULL((void*)0); | |||
105 | ht->size = 0; | |||
106 | ht->sizemask = 0; | |||
107 | ht->used = 0; | |||
108 | } | |||
109 | ||||
110 | /* Create a new hash table */ | |||
111 | dict *dictCreate(dictType *type, | |||
112 | void *privDataPtr) | |||
113 | { | |||
114 | dict *d = zmalloc(sizeof(*d)); | |||
115 | ||||
116 | _dictInit(d,type,privDataPtr); | |||
117 | return d; | |||
118 | } | |||
119 | ||||
120 | /* Initialize the hash table */ | |||
121 | int _dictInit(dict *d, dictType *type, | |||
122 | void *privDataPtr) | |||
123 | { | |||
124 | _dictReset(&d->ht[0]); | |||
125 | _dictReset(&d->ht[1]); | |||
126 | d->type = type; | |||
127 | d->privdata = privDataPtr; | |||
128 | d->rehashidx = -1; | |||
129 | d->pauserehash = 0; | |||
130 | return DICT_OK0; | |||
131 | } | |||
132 | ||||
133 | /* Resize the table to the minimal size that contains all the elements, | |||
134 | * but with the invariant of a USED/BUCKETS ratio near to <= 1 */ | |||
135 | int dictResize(dict *d) | |||
136 | { | |||
137 | unsigned long minimal; | |||
138 | ||||
139 | if (!dict_can_resize || dictIsRehashing(d)((d)->rehashidx != -1)) return DICT_ERR1; | |||
140 | minimal = d->ht[0].used; | |||
141 | if (minimal < DICT_HT_INITIAL_SIZE4) | |||
142 | minimal = DICT_HT_INITIAL_SIZE4; | |||
143 | return dictExpand(d, minimal); | |||
144 | } | |||
145 | ||||
146 | /* Expand or create the hash table, | |||
147 | * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1). | |||
148 | * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */ | |||
149 | int _dictExpand(dict *d, unsigned long size, int* malloc_failed) | |||
150 | { | |||
151 | if (malloc_failed) *malloc_failed = 0; | |||
152 | ||||
153 | /* the size is invalid if it is smaller than the number of | |||
154 | * elements already inside the hash table */ | |||
155 | if (dictIsRehashing(d)((d)->rehashidx != -1) || d->ht[0].used > size) | |||
156 | return DICT_ERR1; | |||
157 | ||||
158 | dictht n; /* the new hash table */ | |||
159 | unsigned long realsize = _dictNextPower(size); | |||
160 | ||||
161 | /* Rehashing to the same table size is not useful. */ | |||
162 | if (realsize == d->ht[0].size) return DICT_ERR1; | |||
163 | ||||
164 | /* Allocate the new hash table and initialize all pointers to NULL */ | |||
165 | n.size = realsize; | |||
166 | n.sizemask = realsize-1; | |||
167 | if (malloc_failed) { | |||
168 | n.table = ztrycalloc(realsize*sizeof(dictEntry*)); | |||
169 | *malloc_failed = n.table == NULL((void*)0); | |||
170 | if (*malloc_failed) | |||
171 | return DICT_ERR1; | |||
172 | } else | |||
173 | n.table = zcalloc(realsize*sizeof(dictEntry*)); | |||
174 | ||||
175 | n.used = 0; | |||
176 | ||||
177 | /* Is this the first initialization? If so it's not really a rehashing | |||
178 | * we just set the first hash table so that it can accept keys. */ | |||
179 | if (d->ht[0].table == NULL((void*)0)) { | |||
180 | d->ht[0] = n; | |||
181 | return DICT_OK0; | |||
182 | } | |||
183 | ||||
184 | /* Prepare a second hash table for incremental rehashing */ | |||
185 | d->ht[1] = n; | |||
186 | d->rehashidx = 0; | |||
187 | return DICT_OK0; | |||
188 | } | |||
189 | ||||
190 | /* return DICT_ERR if expand was not performed */ | |||
191 | int dictExpand(dict *d, unsigned long size) { | |||
192 | return _dictExpand(d, size, NULL((void*)0)); | |||
193 | } | |||
194 | ||||
195 | /* return DICT_ERR if expand failed due to memory allocation failure */ | |||
196 | int dictTryExpand(dict *d, unsigned long size) { | |||
197 | int malloc_failed; | |||
198 | _dictExpand(d, size, &malloc_failed); | |||
199 | return malloc_failed? DICT_ERR1 : DICT_OK0; | |||
200 | } | |||
201 | ||||
202 | /* Performs N steps of incremental rehashing. Returns 1 if there are still | |||
203 | * keys to move from the old to the new hash table, otherwise 0 is returned. | |||
204 | * | |||
205 | * Note that a rehashing step consists in moving a bucket (that may have more | |||
206 | * than one key as we use chaining) from the old to the new hash table, however | |||
207 | * since part of the hash table may be composed of empty spaces, it is not | |||
208 | * guaranteed that this function will rehash even a single bucket, since it | |||
209 | * will visit at max N*10 empty buckets in total, otherwise the amount of | |||
210 | * work it does would be unbound and the function may block for a long time. */ | |||
211 | int dictRehash(dict *d, int n) { | |||
212 | int empty_visits = n*10; /* Max number of empty buckets to visit. */ | |||
213 | if (!dictIsRehashing(d)((d)->rehashidx != -1)) return 0; | |||
214 | ||||
215 | while(n-- && d->ht[0].used != 0) { | |||
216 | dictEntry *de, *nextde; | |||
217 | ||||
218 | /* Note that rehashidx can't overflow as we are sure there are more | |||
219 | * elements because ht[0].used != 0 */ | |||
220 | assert(d->ht[0].size > (unsigned long)d->rehashidx)(__builtin_expect(!!((d->ht[0].size > (unsigned long)d-> rehashidx)), 1)?(void)0 : (_serverAssert("d->ht[0].size > (unsigned long)d->rehashidx" ,"dict.c",220),__builtin_unreachable())); | |||
221 | while(d->ht[0].table[d->rehashidx] == NULL((void*)0)) { | |||
222 | d->rehashidx++; | |||
223 | if (--empty_visits == 0) return 1; | |||
224 | } | |||
225 | de = d->ht[0].table[d->rehashidx]; | |||
226 | /* Move all the keys in this bucket from the old to the new hash HT */ | |||
227 | while(de) { | |||
228 | uint64_t h; | |||
229 | ||||
230 | nextde = de->next; | |||
231 | /* Get the index in the new hash table */ | |||
232 | h = dictHashKey(d, de->key)(d)->type->hashFunction(de->key) & d->ht[1].sizemask; | |||
233 | de->next = d->ht[1].table[h]; | |||
234 | d->ht[1].table[h] = de; | |||
235 | d->ht[0].used--; | |||
236 | d->ht[1].used++; | |||
237 | de = nextde; | |||
238 | } | |||
239 | d->ht[0].table[d->rehashidx] = NULL((void*)0); | |||
240 | d->rehashidx++; | |||
241 | } | |||
242 | ||||
243 | /* Check if we already rehashed the whole table... */ | |||
244 | if (d->ht[0].used == 0) { | |||
245 | zfree(d->ht[0].table); | |||
246 | d->ht[0] = d->ht[1]; | |||
247 | _dictReset(&d->ht[1]); | |||
248 | d->rehashidx = -1; | |||
249 | return 0; | |||
250 | } | |||
251 | ||||
252 | /* More to rehash... */ | |||
253 | return 1; | |||
254 | } | |||
255 | ||||
256 | long long timeInMilliseconds(void) { | |||
257 | struct timeval tv; | |||
258 | ||||
259 | gettimeofday(&tv,NULL((void*)0)); | |||
260 | return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000); | |||
261 | } | |||
262 | ||||
263 | /* Rehash in ms+"delta" milliseconds. The value of "delta" is larger | |||
264 | * than 0, and is smaller than 1 in most cases. The exact upper bound | |||
265 | * depends on the running time of dictRehash(d,100).*/ | |||
266 | int dictRehashMilliseconds(dict *d, int ms) { | |||
267 | if (d->pauserehash > 0) return 0; | |||
268 | ||||
269 | long long start = timeInMilliseconds(); | |||
270 | int rehashes = 0; | |||
271 | ||||
272 | while(dictRehash(d,100)) { | |||
273 | rehashes += 100; | |||
274 | if (timeInMilliseconds()-start > ms) break; | |||
275 | } | |||
276 | return rehashes; | |||
277 | } | |||
278 | ||||
279 | /* This function performs just a step of rehashing, and only if hashing has | |||
280 | * not been paused for our hash table. When we have iterators in the | |||
281 | * middle of a rehashing we can't mess with the two hash tables otherwise | |||
282 | * some element can be missed or duplicated. | |||
283 | * | |||
284 | * This function is called by common lookup or update operations in the | |||
285 | * dictionary so that the hash table automatically migrates from H1 to H2 | |||
286 | * while it is actively used. */ | |||
287 | static void _dictRehashStep(dict *d) { | |||
288 | if (d->pauserehash == 0) dictRehash(d,1); | |||
289 | } | |||
290 | ||||
291 | /* Add an element to the target hash table */ | |||
292 | int dictAdd(dict *d, void *key, void *val) | |||
293 | { | |||
294 | dictEntry *entry = dictAddRaw(d,key,NULL((void*)0)); | |||
295 | ||||
296 | if (!entry) return DICT_ERR1; | |||
297 | dictSetVal(d, entry, val)do { if ((d)->type->valDup) (entry)->v.val = (d)-> type->valDup((d)->privdata, val); else (entry)->v.val = (val); } while(0); | |||
298 | return DICT_OK0; | |||
299 | } | |||
300 | ||||
301 | /* Low level add or find: | |||
302 | * This function adds the entry but instead of setting a value returns the | |||
303 | * dictEntry structure to the user, that will make sure to fill the value | |||
304 | * field as they wish. | |||
305 | * | |||
306 | * This function is also directly exposed to the user API to be called | |||
307 | * mainly in order to store non-pointers inside the hash value, example: | |||
308 | * | |||
309 | * entry = dictAddRaw(dict,mykey,NULL); | |||
310 | * if (entry != NULL) dictSetSignedIntegerVal(entry,1000); | |||
311 | * | |||
312 | * Return values: | |||
313 | * | |||
314 | * If key already exists NULL is returned, and "*existing" is populated | |||
315 | * with the existing entry if existing is not NULL. | |||
316 | * | |||
317 | * If key was added, the hash entry is returned to be manipulated by the caller. | |||
318 | */ | |||
319 | dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) | |||
320 | { | |||
321 | long index; | |||
322 | dictEntry *entry; | |||
323 | dictht *ht; | |||
324 | ||||
325 | if (dictIsRehashing(d)((d)->rehashidx != -1)) _dictRehashStep(d); | |||
326 | ||||
327 | /* Get the index of the new element, or -1 if | |||
328 | * the element already exists. */ | |||
329 | if ((index = _dictKeyIndex(d, key, dictHashKey(d,key)(d)->type->hashFunction(key), existing)) == -1) | |||
330 | return NULL((void*)0); | |||
331 | ||||
332 | /* Allocate the memory and store the new entry. | |||
333 | * Insert the element in top, with the assumption that in a database | |||
334 | * system it is more likely that recently added entries are accessed | |||
335 | * more frequently. */ | |||
336 | ht = dictIsRehashing(d)((d)->rehashidx != -1) ? &d->ht[1] : &d->ht[0]; | |||
337 | entry = zmalloc(sizeof(*entry)); | |||
338 | entry->next = ht->table[index]; | |||
339 | ht->table[index] = entry; | |||
340 | ht->used++; | |||
341 | ||||
342 | /* Set the hash entry fields. */ | |||
343 | dictSetKey(d, entry, key)do { if ((d)->type->keyDup) (entry)->key = (d)->type ->keyDup((d)->privdata, key); else (entry)->key = (key ); } while(0); | |||
344 | return entry; | |||
345 | } | |||
346 | ||||
347 | /* Add or Overwrite: | |||
348 | * Add an element, discarding the old value if the key already exists. | |||
349 | * Return 1 if the key was added from scratch, 0 if there was already an | |||
350 | * element with such key and dictReplace() just performed a value update | |||
351 | * operation. */ | |||
352 | int dictReplace(dict *d, void *key, void *val) | |||
353 | { | |||
354 | dictEntry *entry, *existing, auxentry; | |||
355 | ||||
356 | /* Try to add the element. If the key | |||
357 | * does not exists dictAdd will succeed. */ | |||
358 | entry = dictAddRaw(d,key,&existing); | |||
359 | if (entry) { | |||
360 | dictSetVal(d, entry, val)do { if ((d)->type->valDup) (entry)->v.val = (d)-> type->valDup((d)->privdata, val); else (entry)->v.val = (val); } while(0); | |||
361 | return 1; | |||
362 | } | |||
363 | ||||
364 | /* Set the new value and free the old one. Note that it is important | |||
365 | * to do that in this order, as the value may just be exactly the same | |||
366 | * as the previous one. In this context, think to reference counting, | |||
367 | * you want to increment (set), and then decrement (free), and not the | |||
368 | * reverse. */ | |||
369 | auxentry = *existing; | |||
370 | dictSetVal(d, existing, val)do { if ((d)->type->valDup) (existing)->v.val = (d)-> type->valDup((d)->privdata, val); else (existing)->v .val = (val); } while(0); | |||
371 | dictFreeVal(d, &auxentry)if ((d)->type->valDestructor) (d)->type->valDestructor ((d)->privdata, (&auxentry)->v.val); | |||
372 | return 0; | |||
373 | } | |||
374 | ||||
375 | /* Add or Find: | |||
376 | * dictAddOrFind() is simply a version of dictAddRaw() that always | |||
377 | * returns the hash entry of the specified key, even if the key already | |||
378 | * exists and can't be added (in that case the entry of the already | |||
379 | * existing key is returned.) | |||
380 | * | |||
381 | * See dictAddRaw() for more information. */ | |||
382 | dictEntry *dictAddOrFind(dict *d, void *key) { | |||
383 | dictEntry *entry, *existing; | |||
384 | entry = dictAddRaw(d,key,&existing); | |||
385 | return entry ? entry : existing; | |||
386 | } | |||
387 | ||||
388 | /* Search and remove an element. This is an helper function for | |||
389 | * dictDelete() and dictUnlink(), please check the top comment | |||
390 | * of those functions. */ | |||
391 | static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { | |||
392 | uint64_t h, idx; | |||
393 | dictEntry *he, *prevHe; | |||
394 | int table; | |||
395 | ||||
396 | if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL((void*)0); | |||
397 | ||||
398 | if (dictIsRehashing(d)((d)->rehashidx != -1)) _dictRehashStep(d); | |||
399 | h = dictHashKey(d, key)(d)->type->hashFunction(key); | |||
400 | ||||
401 | for (table = 0; table <= 1; table++) { | |||
402 | idx = h & d->ht[table].sizemask; | |||
403 | he = d->ht[table].table[idx]; | |||
404 | prevHe = NULL((void*)0); | |||
405 | while(he) { | |||
406 | if (key==he->key || dictCompareKeys(d, key, he->key)(((d)->type->keyCompare) ? (d)->type->keyCompare( (d)->privdata, key, he->key) : (key) == (he->key))) { | |||
407 | /* Unlink the element from the list */ | |||
408 | if (prevHe) | |||
409 | prevHe->next = he->next; | |||
410 | else | |||
411 | d->ht[table].table[idx] = he->next; | |||
412 | if (!nofree) { | |||
413 | dictFreeKey(d, he)if ((d)->type->keyDestructor) (d)->type->keyDestructor ((d)->privdata, (he)->key); | |||
414 | dictFreeVal(d, he)if ((d)->type->valDestructor) (d)->type->valDestructor ((d)->privdata, (he)->v.val); | |||
415 | zfree(he); | |||
416 | } | |||
417 | d->ht[table].used--; | |||
418 | return he; | |||
419 | } | |||
420 | prevHe = he; | |||
421 | he = he->next; | |||
422 | } | |||
423 | if (!dictIsRehashing(d)((d)->rehashidx != -1)) break; | |||
424 | } | |||
425 | return NULL((void*)0); /* not found */ | |||
426 | } | |||
427 | ||||
428 | /* Remove an element, returning DICT_OK on success or DICT_ERR if the | |||
429 | * element was not found. */ | |||
430 | int dictDelete(dict *ht, const void *key) { | |||
431 | return dictGenericDelete(ht,key,0) ? DICT_OK0 : DICT_ERR1; | |||
432 | } | |||
433 | ||||
434 | /* Remove an element from the table, but without actually releasing | |||
435 | * the key, value and dictionary entry. The dictionary entry is returned | |||
436 | * if the element was found (and unlinked from the table), and the user | |||
437 | * should later call `dictFreeUnlinkedEntry()` with it in order to release it. | |||
438 | * Otherwise if the key is not found, NULL is returned. | |||
439 | * | |||
440 | * This function is useful when we want to remove something from the hash | |||
441 | * table but want to use its value before actually deleting the entry. | |||
442 | * Without this function the pattern would require two lookups: | |||
443 | * | |||
444 | * entry = dictFind(...); | |||
445 | * // Do something with entry | |||
446 | * dictDelete(dictionary,entry); | |||
447 | * | |||
448 | * Thanks to this function it is possible to avoid this, and use | |||
449 | * instead: | |||
450 | * | |||
451 | * entry = dictUnlink(dictionary,entry); | |||
452 | * // Do something with entry | |||
453 | * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again. | |||
454 | */ | |||
455 | dictEntry *dictUnlink(dict *ht, const void *key) { | |||
456 | return dictGenericDelete(ht,key,1); | |||
457 | } | |||
458 | ||||
459 | /* You need to call this function to really free the entry after a call | |||
460 | * to dictUnlink(). It's safe to call this function with 'he' = NULL. */ | |||
461 | void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { | |||
462 | if (he == NULL((void*)0)) return; | |||
463 | dictFreeKey(d, he)if ((d)->type->keyDestructor) (d)->type->keyDestructor ((d)->privdata, (he)->key); | |||
464 | dictFreeVal(d, he)if ((d)->type->valDestructor) (d)->type->valDestructor ((d)->privdata, (he)->v.val); | |||
465 | zfree(he); | |||
466 | } | |||
467 | ||||
468 | /* Destroy an entire dictionary */ | |||
469 | int _dictClear(dict *d, dictht *ht, void(callback)(void *)) { | |||
470 | unsigned long i; | |||
471 | ||||
472 | /* Free all the elements */ | |||
473 | for (i = 0; i < ht->size && ht->used > 0; i++) { | |||
474 | dictEntry *he, *nextHe; | |||
475 | ||||
476 | if (callback && (i & 65535) == 0) callback(d->privdata); | |||
477 | ||||
478 | if ((he = ht->table[i]) == NULL((void*)0)) continue; | |||
479 | while(he) { | |||
480 | nextHe = he->next; | |||
481 | dictFreeKey(d, he)if ((d)->type->keyDestructor) (d)->type->keyDestructor ((d)->privdata, (he)->key); | |||
482 | dictFreeVal(d, he)if ((d)->type->valDestructor) (d)->type->valDestructor ((d)->privdata, (he)->v.val); | |||
483 | zfree(he); | |||
484 | ht->used--; | |||
485 | he = nextHe; | |||
486 | } | |||
487 | } | |||
488 | /* Free the table and the allocated cache structure */ | |||
489 | zfree(ht->table); | |||
490 | /* Re-initialize the table */ | |||
491 | _dictReset(ht); | |||
492 | return DICT_OK0; /* never fails */ | |||
493 | } | |||
494 | ||||
495 | /* Clear & Release the hash table */ | |||
496 | void dictRelease(dict *d) | |||
497 | { | |||
498 | _dictClear(d,&d->ht[0],NULL((void*)0)); | |||
499 | _dictClear(d,&d->ht[1],NULL((void*)0)); | |||
500 | zfree(d); | |||
501 | } | |||
502 | ||||
503 | dictEntry *dictFind(dict *d, const void *key) | |||
504 | { | |||
505 | dictEntry *he; | |||
506 | uint64_t h, idx, table; | |||
507 | ||||
508 | if (dictSize(d)((d)->ht[0].used+(d)->ht[1].used) == 0) return NULL((void*)0); /* dict is empty */ | |||
509 | if (dictIsRehashing(d)((d)->rehashidx != -1)) _dictRehashStep(d); | |||
510 | h = dictHashKey(d, key)(d)->type->hashFunction(key); | |||
511 | for (table = 0; table <= 1; table++) { | |||
512 | idx = h & d->ht[table].sizemask; | |||
513 | he = d->ht[table].table[idx]; | |||
514 | while(he) { | |||
515 | if (key==he->key || dictCompareKeys(d, key, he->key)(((d)->type->keyCompare) ? (d)->type->keyCompare( (d)->privdata, key, he->key) : (key) == (he->key))) | |||
516 | return he; | |||
517 | he = he->next; | |||
518 | } | |||
519 | if (!dictIsRehashing(d)((d)->rehashidx != -1)) return NULL((void*)0); | |||
520 | } | |||
521 | return NULL((void*)0); | |||
522 | } | |||
523 | ||||
524 | void *dictFetchValue(dict *d, const void *key) { | |||
525 | dictEntry *he; | |||
526 | ||||
527 | he = dictFind(d,key); | |||
528 | return he ? dictGetVal(he)((he)->v.val) : NULL((void*)0); | |||
529 | } | |||
530 | ||||
531 | /* A fingerprint is a 64 bit number that represents the state of the dictionary | |||
532 | * at a given time, it's just a few dict properties xored together. | |||
533 | * When an unsafe iterator is initialized, we get the dict fingerprint, and check | |||
534 | * the fingerprint again when the iterator is released. | |||
535 | * If the two fingerprints are different it means that the user of the iterator | |||
536 | * performed forbidden operations against the dictionary while iterating. */ | |||
537 | long long dictFingerprint(dict *d) { | |||
538 | long long integers[6], hash = 0; | |||
539 | int j; | |||
540 | ||||
541 | integers[0] = (long) d->ht[0].table; | |||
542 | integers[1] = d->ht[0].size; | |||
543 | integers[2] = d->ht[0].used; | |||
544 | integers[3] = (long) d->ht[1].table; | |||
545 | integers[4] = d->ht[1].size; | |||
546 | integers[5] = d->ht[1].used; | |||
547 | ||||
548 | /* We hash N integers by summing every successive integer with the integer | |||
549 | * hashing of the previous sum. Basically: | |||
550 | * | |||
551 | * Result = hash(hash(hash(int1)+int2)+int3) ... | |||
552 | * | |||
553 | * This way the same set of integers in a different order will (likely) hash | |||
554 | * to a different number. */ | |||
555 | for (j = 0; j < 6; j++) { | |||
556 | hash += integers[j]; | |||
557 | /* For the hashing step we use Tomas Wang's 64 bit integer hash. */ | |||
558 | hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; | |||
559 | hash = hash ^ (hash >> 24); | |||
560 | hash = (hash + (hash << 3)) + (hash << 8); // hash * 265 | |||
561 | hash = hash ^ (hash >> 14); | |||
562 | hash = (hash + (hash << 2)) + (hash << 4); // hash * 21 | |||
563 | hash = hash ^ (hash >> 28); | |||
564 | hash = hash + (hash << 31); | |||
565 | } | |||
566 | return hash; | |||
567 | } | |||
568 | ||||
569 | dictIterator *dictGetIterator(dict *d) | |||
570 | { | |||
571 | dictIterator *iter = zmalloc(sizeof(*iter)); | |||
572 | ||||
573 | iter->d = d; | |||
574 | iter->table = 0; | |||
575 | iter->index = -1; | |||
576 | iter->safe = 0; | |||
577 | iter->entry = NULL((void*)0); | |||
578 | iter->nextEntry = NULL((void*)0); | |||
579 | return iter; | |||
580 | } | |||
581 | ||||
582 | dictIterator *dictGetSafeIterator(dict *d) { | |||
583 | dictIterator *i = dictGetIterator(d); | |||
584 | ||||
585 | i->safe = 1; | |||
586 | return i; | |||
587 | } | |||
588 | ||||
589 | dictEntry *dictNext(dictIterator *iter) | |||
590 | { | |||
591 | while (1) { | |||
592 | if (iter->entry == NULL((void*)0)) { | |||
593 | dictht *ht = &iter->d->ht[iter->table]; | |||
594 | if (iter->index == -1 && iter->table == 0) { | |||
595 | if (iter->safe) | |||
596 | dictPauseRehashing(iter->d)(iter->d)->pauserehash++; | |||
597 | else | |||
598 | iter->fingerprint = dictFingerprint(iter->d); | |||
599 | } | |||
600 | iter->index++; | |||
601 | if (iter->index >= (long) ht->size) { | |||
602 | if (dictIsRehashing(iter->d)((iter->d)->rehashidx != -1) && iter->table == 0) { | |||
603 | iter->table++; | |||
604 | iter->index = 0; | |||
605 | ht = &iter->d->ht[1]; | |||
606 | } else { | |||
607 | break; | |||
608 | } | |||
609 | } | |||
610 | iter->entry = ht->table[iter->index]; | |||
611 | } else { | |||
612 | iter->entry = iter->nextEntry; | |||
613 | } | |||
614 | if (iter->entry) { | |||
615 | /* We need to save the 'next' here, the iterator user | |||
616 | * may delete the entry we are returning. */ | |||
617 | iter->nextEntry = iter->entry->next; | |||
618 | return iter->entry; | |||
619 | } | |||
620 | } | |||
621 | return NULL((void*)0); | |||
622 | } | |||
623 | ||||
624 | void dictReleaseIterator(dictIterator *iter) | |||
625 | { | |||
626 | if (!(iter->index == -1 && iter->table == 0)) { | |||
627 | if (iter->safe) | |||
628 | dictResumeRehashing(iter->d)(iter->d)->pauserehash--; | |||
629 | else | |||
630 | assert(iter->fingerprint == dictFingerprint(iter->d))(__builtin_expect(!!((iter->fingerprint == dictFingerprint (iter->d))), 1)?(void)0 : (_serverAssert("iter->fingerprint == dictFingerprint(iter->d)" ,"dict.c",630),__builtin_unreachable())); | |||
631 | } | |||
632 | zfree(iter); | |||
633 | } | |||
634 | ||||
635 | /* Return a random entry from the hash table. Useful to | |||
636 | * implement randomized algorithms */ | |||
637 | dictEntry *dictGetRandomKey(dict *d) | |||
638 | { | |||
639 | dictEntry *he, *orighe; | |||
640 | unsigned long h; | |||
641 | int listlen, listele; | |||
642 | ||||
643 | if (dictSize(d)((d)->ht[0].used+(d)->ht[1].used) == 0) return NULL((void*)0); | |||
644 | if (dictIsRehashing(d)((d)->rehashidx != -1)) _dictRehashStep(d); | |||
645 | if (dictIsRehashing(d)((d)->rehashidx != -1)) { | |||
646 | do { | |||
647 | /* We are sure there are no elements in indexes from 0 | |||
648 | * to rehashidx-1 */ | |||
649 | h = d->rehashidx + (randomULong()((unsigned long) genrand64_int64()) % (dictSlots(d)((d)->ht[0].size+(d)->ht[1].size) - d->rehashidx)); | |||
650 | he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] : | |||
651 | d->ht[0].table[h]; | |||
652 | } while(he == NULL((void*)0)); | |||
653 | } else { | |||
654 | do { | |||
655 | h = randomULong()((unsigned long) genrand64_int64()) & d->ht[0].sizemask; | |||
656 | he = d->ht[0].table[h]; | |||
657 | } while(he == NULL((void*)0)); | |||
658 | } | |||
659 | ||||
660 | /* Now we found a non empty bucket, but it is a linked | |||
661 | * list and we need to get a random element from the list. | |||
662 | * The only sane way to do so is counting the elements and | |||
663 | * select a random index. */ | |||
664 | listlen = 0; | |||
665 | orighe = he; | |||
666 | while(he) { | |||
667 | he = he->next; | |||
668 | listlen++; | |||
669 | } | |||
670 | listele = random() % listlen; | |||
671 | he = orighe; | |||
672 | while(listele--) he = he->next; | |||
| ||||
673 | return he; | |||
674 | } | |||
675 | ||||
676 | /* This function samples the dictionary to return a few keys from random | |||
677 | * locations. | |||
678 | * | |||
679 | * It does not guarantee to return all the keys specified in 'count', nor | |||
680 | * it does guarantee to return non-duplicated elements, however it will make | |||
681 | * some effort to do both things. | |||
682 | * | |||
683 | * Returned pointers to hash table entries are stored into 'des' that | |||
684 | * points to an array of dictEntry pointers. The array must have room for | |||
685 | * at least 'count' elements, that is the argument we pass to the function | |||
686 | * to tell how many random elements we need. | |||
687 | * | |||
688 | * The function returns the number of items stored into 'des', that may | |||
689 | * be less than 'count' if the hash table has less than 'count' elements | |||
690 | * inside, or if not enough elements were found in a reasonable amount of | |||
691 | * steps. | |||
692 | * | |||
693 | * Note that this function is not suitable when you need a good distribution | |||
694 | * of the returned items, but only when you need to "sample" a given number | |||
695 | * of continuous elements to run some kind of algorithm or to produce | |||
696 | * statistics. However the function is much faster than dictGetRandomKey() | |||
697 | * at producing N elements. */ | |||
698 | unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { | |||
699 | unsigned long j; /* internal hash table id, 0 or 1. */ | |||
700 | unsigned long tables; /* 1 or 2 tables? */ | |||
701 | unsigned long stored = 0, maxsizemask; | |||
702 | unsigned long maxsteps; | |||
703 | ||||
704 | if (dictSize(d)((d)->ht[0].used+(d)->ht[1].used) < count) count = dictSize(d)((d)->ht[0].used+(d)->ht[1].used); | |||
705 | maxsteps = count*10; | |||
706 | ||||
707 | /* Try to do a rehashing work proportional to 'count'. */ | |||
708 | for (j = 0; j < count; j++) { | |||
709 | if (dictIsRehashing(d)((d)->rehashidx != -1)) | |||
710 | _dictRehashStep(d); | |||
711 | else | |||
712 | break; | |||
713 | } | |||
714 | ||||
715 | tables = dictIsRehashing(d)((d)->rehashidx != -1) ? 2 : 1; | |||
716 | maxsizemask = d->ht[0].sizemask; | |||
717 | if (tables > 1 && maxsizemask < d->ht[1].sizemask) | |||
718 | maxsizemask = d->ht[1].sizemask; | |||
719 | ||||
720 | /* Pick a random point inside the larger table. */ | |||
721 | unsigned long i = randomULong()((unsigned long) genrand64_int64()) & maxsizemask; | |||
722 | unsigned long emptylen = 0; /* Continuous empty entries so far. */ | |||
723 | while(stored < count && maxsteps--) { | |||
724 | for (j = 0; j < tables; j++) { | |||
725 | /* Invariant of the dict.c rehashing: up to the indexes already | |||
726 | * visited in ht[0] during the rehashing, there are no populated | |||
727 | * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */ | |||
728 | if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) { | |||
729 | /* Moreover, if we are currently out of range in the second | |||
730 | * table, there will be no elements in both tables up to | |||
731 | * the current rehashing index, so we jump if possible. | |||
732 | * (this happens when going from big to small table). */ | |||
733 | if (i >= d->ht[1].size) | |||
734 | i = d->rehashidx; | |||
735 | else | |||
736 | continue; | |||
737 | } | |||
738 | if (i >= d->ht[j].size) continue; /* Out of range for this table. */ | |||
739 | dictEntry *he = d->ht[j].table[i]; | |||
740 | ||||
741 | /* Count contiguous empty buckets, and jump to other | |||
742 | * locations if they reach 'count' (with a minimum of 5). */ | |||
743 | if (he == NULL((void*)0)) { | |||
744 | emptylen++; | |||
745 | if (emptylen >= 5 && emptylen > count) { | |||
746 | i = randomULong()((unsigned long) genrand64_int64()) & maxsizemask; | |||
747 | emptylen = 0; | |||
748 | } | |||
749 | } else { | |||
750 | emptylen = 0; | |||
751 | while (he) { | |||
752 | /* Collect all the elements of the buckets found non | |||
753 | * empty while iterating. */ | |||
754 | *des = he; | |||
755 | des++; | |||
756 | he = he->next; | |||
757 | stored++; | |||
758 | if (stored == count) return stored; | |||
759 | } | |||
760 | } | |||
761 | } | |||
762 | i = (i+1) & maxsizemask; | |||
763 | } | |||
764 | return stored; | |||
765 | } | |||
766 | ||||
767 | /* This is like dictGetRandomKey() from the POV of the API, but will do more | |||
768 | * work to ensure a better distribution of the returned element. | |||
769 | * | |||
770 | * This function improves the distribution because the dictGetRandomKey() | |||
771 | * problem is that it selects a random bucket, then it selects a random | |||
772 | * element from the chain in the bucket. However elements being in different | |||
773 | * chain lengths will have different probabilities of being reported. With | |||
774 | * this function instead what we do is to consider a "linear" range of the table | |||
775 | * that may be constituted of N buckets with chains of different lengths | |||
776 | * appearing one after the other. Then we report a random element in the range. | |||
777 | * In this way we smooth away the problem of different chain lengths. */ | |||
778 | #define GETFAIR_NUM_ENTRIES15 15 | |||
779 | dictEntry *dictGetFairRandomKey(dict *d) { | |||
780 | dictEntry *entries[GETFAIR_NUM_ENTRIES15]; | |||
781 | unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES15); | |||
782 | /* Note that dictGetSomeKeys() may return zero elements in an unlucky | |||
783 | * run() even if there are actually elements inside the hash table. So | |||
784 | * when we get zero, we call the true dictGetRandomKey() that will always | |||
785 | * yield the element if the hash table has at least one. */ | |||
786 | if (count == 0) return dictGetRandomKey(d); | |||
| ||||
787 | unsigned int idx = rand() % count; | |||
788 | return entries[idx]; | |||
789 | } | |||
790 | ||||
791 | /* Function to reverse bits. Algorithm from: | |||
792 | * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ | |||
793 | static unsigned long rev(unsigned long v) { | |||
794 | unsigned long s = CHAR_BIT8 * sizeof(v); // bit size; must be power of 2 | |||
795 | unsigned long mask = ~0UL; | |||
796 | while ((s >>= 1) > 0) { | |||
797 | mask ^= (mask << s); | |||
798 | v = ((v >> s) & mask) | ((v << s) & ~mask); | |||
799 | } | |||
800 | return v; | |||
801 | } | |||
802 | ||||
803 | /* dictScan() is used to iterate over the elements of a dictionary. | |||
804 | * | |||
805 | * Iterating works the following way: | |||
806 | * | |||
807 | * 1) Initially you call the function using a cursor (v) value of 0. | |||
808 | * 2) The function performs one step of the iteration, and returns the | |||
809 | * new cursor value you must use in the next call. | |||
810 | * 3) When the returned cursor is 0, the iteration is complete. | |||
811 | * | |||
812 | * The function guarantees all elements present in the | |||
813 | * dictionary get returned between the start and end of the iteration. | |||
814 | * However it is possible some elements get returned multiple times. | |||
815 | * | |||
816 | * For every element returned, the callback argument 'fn' is | |||
817 | * called with 'privdata' as first argument and the dictionary entry | |||
818 | * 'de' as second argument. | |||
819 | * | |||
820 | * HOW IT WORKS. | |||
821 | * | |||
822 | * The iteration algorithm was designed by Pieter Noordhuis. | |||
823 | * The main idea is to increment a cursor starting from the higher order | |||
824 | * bits. That is, instead of incrementing the cursor normally, the bits | |||
825 | * of the cursor are reversed, then the cursor is incremented, and finally | |||
826 | * the bits are reversed again. | |||
827 | * | |||
828 | * This strategy is needed because the hash table may be resized between | |||
829 | * iteration calls. | |||
830 | * | |||
831 | * dict.c hash tables are always power of two in size, and they | |||
832 | * use chaining, so the position of an element in a given table is given | |||
833 | * by computing the bitwise AND between Hash(key) and SIZE-1 | |||
834 | * (where SIZE-1 is always the mask that is equivalent to taking the rest | |||
835 | * of the division between the Hash of the key and SIZE). | |||
836 | * | |||
837 | * For example if the current hash table size is 16, the mask is | |||
838 | * (in binary) 1111. The position of a key in the hash table will always be | |||
839 | * the last four bits of the hash output, and so forth. | |||
840 | * | |||
841 | * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE? | |||
842 | * | |||
843 | * If the hash table grows, elements can go anywhere in one multiple of | |||
844 | * the old bucket: for example let's say we already iterated with | |||
845 | * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16). | |||
846 | * | |||
847 | * If the hash table will be resized to 64 elements, then the new mask will | |||
848 | * be 111111. The new buckets you obtain by substituting in ??1100 | |||
849 | * with either 0 or 1 can be targeted only by keys we already visited | |||
850 | * when scanning the bucket 1100 in the smaller hash table. | |||
851 | * | |||
852 | * By iterating the higher bits first, because of the inverted counter, the | |||
853 | * cursor does not need to restart if the table size gets bigger. It will | |||
854 | * continue iterating using cursors without '1100' at the end, and also | |||
855 | * without any other combination of the final 4 bits already explored. | |||
856 | * | |||
857 | * Similarly when the table size shrinks over time, for example going from | |||
858 | * 16 to 8, if a combination of the lower three bits (the mask for size 8 | |||
859 | * is 111) were already completely explored, it would not be visited again | |||
860 | * because we are sure we tried, for example, both 0111 and 1111 (all the | |||
861 | * variations of the higher bit) so we don't need to test it again. | |||
862 | * | |||
863 | * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING! | |||
864 | * | |||
865 | * Yes, this is true, but we always iterate the smaller table first, then | |||
866 | * we test all the expansions of the current cursor into the larger | |||
867 | * table. For example if the current cursor is 101 and we also have a | |||
868 | * larger table of size 16, we also test (0)101 and (1)101 inside the larger | |||
869 | * table. This reduces the problem back to having only one table, where | |||
870 | * the larger one, if it exists, is just an expansion of the smaller one. | |||
871 | * | |||
872 | * LIMITATIONS | |||
873 | * | |||
874 | * This iterator is completely stateless, and this is a huge advantage, | |||
875 | * including no additional memory used. | |||
876 | * | |||
877 | * The disadvantages resulting from this design are: | |||
878 | * | |||
879 | * 1) It is possible we return elements more than once. However this is usually | |||
880 | * easy to deal with in the application level. | |||
881 | * 2) The iterator must return multiple elements per call, as it needs to always | |||
882 | * return all the keys chained in a given bucket, and all the expansions, so | |||
883 | * we are sure we don't miss keys moving during rehashing. | |||
884 | * 3) The reverse cursor is somewhat hard to understand at first, but this | |||
885 | * comment is supposed to help. | |||
886 | */ | |||
887 | unsigned long dictScan(dict *d, | |||
888 | unsigned long v, | |||
889 | dictScanFunction *fn, | |||
890 | dictScanBucketFunction* bucketfn, | |||
891 | void *privdata) | |||
892 | { | |||
893 | dictht *t0, *t1; | |||
894 | const dictEntry *de, *next; | |||
895 | unsigned long m0, m1; | |||
896 | ||||
897 | if (dictSize(d)((d)->ht[0].used+(d)->ht[1].used) == 0) return 0; | |||
898 | ||||
899 | /* This is needed in case the scan callback tries to do dictFind or alike. */ | |||
900 | dictPauseRehashing(d)(d)->pauserehash++; | |||
901 | ||||
902 | if (!dictIsRehashing(d)((d)->rehashidx != -1)) { | |||
903 | t0 = &(d->ht[0]); | |||
904 | m0 = t0->sizemask; | |||
905 | ||||
906 | /* Emit entries at cursor */ | |||
907 | if (bucketfn) bucketfn(privdata, &t0->table[v & m0]); | |||
908 | de = t0->table[v & m0]; | |||
909 | while (de) { | |||
910 | next = de->next; | |||
911 | fn(privdata, de); | |||
912 | de = next; | |||
913 | } | |||
914 | ||||
915 | /* Set unmasked bits so incrementing the reversed cursor | |||
916 | * operates on the masked bits */ | |||
917 | v |= ~m0; | |||
918 | ||||
919 | /* Increment the reverse cursor */ | |||
920 | v = rev(v); | |||
921 | v++; | |||
922 | v = rev(v); | |||
923 | ||||
924 | } else { | |||
925 | t0 = &d->ht[0]; | |||
926 | t1 = &d->ht[1]; | |||
927 | ||||
928 | /* Make sure t0 is the smaller and t1 is the bigger table */ | |||
929 | if (t0->size > t1->size) { | |||
930 | t0 = &d->ht[1]; | |||
931 | t1 = &d->ht[0]; | |||
932 | } | |||
933 | ||||
934 | m0 = t0->sizemask; | |||
935 | m1 = t1->sizemask; | |||
936 | ||||
937 | /* Emit entries at cursor */ | |||
938 | if (bucketfn) bucketfn(privdata, &t0->table[v & m0]); | |||
939 | de = t0->table[v & m0]; | |||
940 | while (de) { | |||
941 | next = de->next; | |||
942 | fn(privdata, de); | |||
943 | de = next; | |||
944 | } | |||
945 | ||||
946 | /* Iterate over indices in larger table that are the expansion | |||
947 | * of the index pointed to by the cursor in the smaller table */ | |||
948 | do { | |||
949 | /* Emit entries at cursor */ | |||
950 | if (bucketfn) bucketfn(privdata, &t1->table[v & m1]); | |||
951 | de = t1->table[v & m1]; | |||
952 | while (de) { | |||
953 | next = de->next; | |||
954 | fn(privdata, de); | |||
955 | de = next; | |||
956 | } | |||
957 | ||||
958 | /* Increment the reverse cursor not covered by the smaller mask.*/ | |||
959 | v |= ~m1; | |||
960 | v = rev(v); | |||
961 | v++; | |||
962 | v = rev(v); | |||
963 | ||||
964 | /* Continue while bits covered by mask difference is non-zero */ | |||
965 | } while (v & (m0 ^ m1)); | |||
966 | } | |||
967 | ||||
968 | dictResumeRehashing(d)(d)->pauserehash--; | |||
969 | ||||
970 | return v; | |||
971 | } | |||
972 | ||||
973 | /* ------------------------- private functions ------------------------------ */ | |||
974 | ||||
975 | /* Because we may need to allocate huge memory chunk at once when dict | |||
976 | * expands, we will check this allocation is allowed or not if the dict | |||
977 | * type has expandAllowed member function. */ | |||
978 | static int dictTypeExpandAllowed(dict *d) { | |||
979 | if (d->type->expandAllowed == NULL((void*)0)) return 1; | |||
980 | return d->type->expandAllowed( | |||
981 | _dictNextPower(d->ht[0].used + 1) * sizeof(dictEntry*), | |||
982 | (double)d->ht[0].used / d->ht[0].size); | |||
983 | } | |||
984 | ||||
985 | /* Expand the hash table if needed */ | |||
986 | static int _dictExpandIfNeeded(dict *d) | |||
987 | { | |||
988 | /* Incremental rehashing already in progress. Return. */ | |||
989 | if (dictIsRehashing(d)((d)->rehashidx != -1)) return DICT_OK0; | |||
990 | ||||
991 | /* If the hash table is empty expand it to the initial size. */ | |||
992 | if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE4); | |||
993 | ||||
994 | /* If we reached the 1:1 ratio, and we are allowed to resize the hash | |||
995 | * table (global setting) or we should avoid it but the ratio between | |||
996 | * elements/buckets is over the "safe" threshold, we resize doubling | |||
997 | * the number of buckets. */ | |||
998 | if (d->ht[0].used >= d->ht[0].size && | |||
999 | (dict_can_resize || | |||
1000 | d->ht[0].used/d->ht[0].size > dict_force_resize_ratio) && | |||
1001 | dictTypeExpandAllowed(d)) | |||
1002 | { | |||
1003 | return dictExpand(d, d->ht[0].used + 1); | |||
1004 | } | |||
1005 | return DICT_OK0; | |||
1006 | } | |||
1007 | ||||
1008 | /* Our hash table capability is a power of two */ | |||
1009 | static unsigned long _dictNextPower(unsigned long size) | |||
1010 | { | |||
1011 | unsigned long i = DICT_HT_INITIAL_SIZE4; | |||
1012 | ||||
1013 | if (size >= LONG_MAX9223372036854775807L) return LONG_MAX9223372036854775807L + 1LU; | |||
1014 | while(1) { | |||
1015 | if (i >= size) | |||
1016 | return i; | |||
1017 | i *= 2; | |||
1018 | } | |||
1019 | } | |||
1020 | ||||
1021 | /* Returns the index of a free slot that can be populated with | |||
1022 | * a hash entry for the given 'key'. | |||
1023 | * If the key already exists, -1 is returned | |||
1024 | * and the optional output parameter may be filled. | |||
1025 | * | |||
1026 | * Note that if we are in the process of rehashing the hash table, the | |||
1027 | * index is always returned in the context of the second (new) hash table. */ | |||
1028 | static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing) | |||
1029 | { | |||
1030 | unsigned long idx, table; | |||
1031 | dictEntry *he; | |||
1032 | if (existing) *existing = NULL((void*)0); | |||
1033 | ||||
1034 | /* Expand the hash table if needed */ | |||
1035 | if (_dictExpandIfNeeded(d) == DICT_ERR1) | |||
1036 | return -1; | |||
1037 | for (table = 0; table <= 1; table++) { | |||
1038 | idx = hash & d->ht[table].sizemask; | |||
1039 | /* Search if this slot does not already contain the given key */ | |||
1040 | he = d->ht[table].table[idx]; | |||
1041 | while(he) { | |||
1042 | if (key==he->key || dictCompareKeys(d, key, he->key)(((d)->type->keyCompare) ? (d)->type->keyCompare( (d)->privdata, key, he->key) : (key) == (he->key))) { | |||
1043 | if (existing) *existing = he; | |||
1044 | return -1; | |||
1045 | } | |||
1046 | he = he->next; | |||
1047 | } | |||
1048 | if (!dictIsRehashing(d)((d)->rehashidx != -1)) break; | |||
1049 | } | |||
1050 | return idx; | |||
1051 | } | |||
1052 | ||||
1053 | void dictEmpty(dict *d, void(callback)(void*)) { | |||
1054 | _dictClear(d,&d->ht[0],callback); | |||
1055 | _dictClear(d,&d->ht[1],callback); | |||
1056 | d->rehashidx = -1; | |||
1057 | d->pauserehash = 0; | |||
1058 | } | |||
1059 | ||||
1060 | void dictEnableResize(void) { | |||
1061 | dict_can_resize = 1; | |||
1062 | } | |||
1063 | ||||
1064 | void dictDisableResize(void) { | |||
1065 | dict_can_resize = 0; | |||
1066 | } | |||
1067 | ||||
1068 | uint64_t dictGetHash(dict *d, const void *key) { | |||
1069 | return dictHashKey(d, key)(d)->type->hashFunction(key); | |||
1070 | } | |||
1071 | ||||
1072 | /* Finds the dictEntry reference by using pointer and pre-calculated hash. | |||
1073 | * oldkey is a dead pointer and should not be accessed. | |||
1074 | * the hash value should be provided using dictGetHash. | |||
1075 | * no string / key comparison is performed. | |||
1076 | * return value is the reference to the dictEntry if found, or NULL if not found. */ | |||
1077 | dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) { | |||
1078 | dictEntry *he, **heref; | |||
1079 | unsigned long idx, table; | |||
1080 | ||||
1081 | if (dictSize(d)((d)->ht[0].used+(d)->ht[1].used) == 0) return NULL((void*)0); /* dict is empty */ | |||
1082 | for (table = 0; table <= 1; table++) { | |||
1083 | idx = hash & d->ht[table].sizemask; | |||
1084 | heref = &d->ht[table].table[idx]; | |||
1085 | he = *heref; | |||
1086 | while(he) { | |||
1087 | if (oldptr==he->key) | |||
1088 | return heref; | |||
1089 | heref = &he->next; | |||
1090 | he = *heref; | |||
1091 | } | |||
1092 | if (!dictIsRehashing(d)((d)->rehashidx != -1)) return NULL((void*)0); | |||
1093 | } | |||
1094 | return NULL((void*)0); | |||
1095 | } | |||
1096 | ||||
1097 | /* ------------------------------- Debugging ---------------------------------*/ | |||
1098 | ||||
1099 | #define DICT_STATS_VECTLEN50 50 | |||
1100 | size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) { | |||
1101 | unsigned long i, slots = 0, chainlen, maxchainlen = 0; | |||
1102 | unsigned long totchainlen = 0; | |||
1103 | unsigned long clvector[DICT_STATS_VECTLEN50]; | |||
1104 | size_t l = 0; | |||
1105 | ||||
1106 | if (ht->used == 0) { | |||
1107 | return snprintf(buf,bufsize, | |||
1108 | "No stats available for empty dictionaries\n"); | |||
1109 | } | |||
1110 | ||||
1111 | /* Compute stats. */ | |||
1112 | for (i = 0; i < DICT_STATS_VECTLEN50; i++) clvector[i] = 0; | |||
1113 | for (i = 0; i < ht->size; i++) { | |||
1114 | dictEntry *he; | |||
1115 | ||||
1116 | if (ht->table[i] == NULL((void*)0)) { | |||
1117 | clvector[0]++; | |||
1118 | continue; | |||
1119 | } | |||
1120 | slots++; | |||
1121 | /* For each hash entry on this slot... */ | |||
1122 | chainlen = 0; | |||
1123 | he = ht->table[i]; | |||
1124 | while(he) { | |||
1125 | chainlen++; | |||
1126 | he = he->next; | |||
1127 | } | |||
1128 | clvector[(chainlen < DICT_STATS_VECTLEN50) ? chainlen : (DICT_STATS_VECTLEN50-1)]++; | |||
1129 | if (chainlen > maxchainlen) maxchainlen = chainlen; | |||
1130 | totchainlen += chainlen; | |||
1131 | } | |||
1132 | ||||
1133 | /* Generate human readable stats. */ | |||
1134 | l += snprintf(buf+l,bufsize-l, | |||
1135 | "Hash table %d stats (%s):\n" | |||
1136 | " table size: %lu\n" | |||
1137 | " number of elements: %lu\n" | |||
1138 | " different slots: %lu\n" | |||
1139 | " max chain length: %lu\n" | |||
1140 | " avg chain length (counted): %.02f\n" | |||
1141 | " avg chain length (computed): %.02f\n" | |||
1142 | " Chain length distribution:\n", | |||
1143 | tableid, (tableid == 0) ? "main hash table" : "rehashing target", | |||
1144 | ht->size, ht->used, slots, maxchainlen, | |||
1145 | (float)totchainlen/slots, (float)ht->used/slots); | |||
1146 | ||||
1147 | for (i = 0; i < DICT_STATS_VECTLEN50-1; i++) { | |||
1148 | if (clvector[i] == 0) continue; | |||
1149 | if (l >= bufsize) break; | |||
1150 | l += snprintf(buf+l,bufsize-l, | |||
1151 | " %s%ld: %ld (%.02f%%)\n", | |||
1152 | (i == DICT_STATS_VECTLEN50-1)?">= ":"", | |||
1153 | i, clvector[i], ((float)clvector[i]/ht->size)*100); | |||
1154 | } | |||
1155 | ||||
1156 | /* Unlike snprintf(), return the number of characters actually written. */ | |||
1157 | if (bufsize) buf[bufsize-1] = '\0'; | |||
1158 | return strlen(buf); | |||
1159 | } | |||
1160 | ||||
1161 | void dictGetStats(char *buf, size_t bufsize, dict *d) { | |||
1162 | size_t l; | |||
1163 | char *orig_buf = buf; | |||
1164 | size_t orig_bufsize = bufsize; | |||
1165 | ||||
1166 | l = _dictGetStatsHt(buf,bufsize,&d->ht[0],0); | |||
1167 | buf += l; | |||
1168 | bufsize -= l; | |||
1169 | if (dictIsRehashing(d)((d)->rehashidx != -1) && bufsize > 0) { | |||
1170 | _dictGetStatsHt(buf,bufsize,&d->ht[1],1); | |||
1171 | } | |||
1172 | /* Make sure there is a NULL term at the end. */ | |||
1173 | if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0'; | |||
1174 | } | |||
1175 | ||||
1176 | /* ------------------------------- Benchmark ---------------------------------*/ | |||
1177 | ||||
1178 | #ifdef DICT_BENCHMARK_MAIN | |||
1179 | ||||
1180 | #include "sds.h" | |||
1181 | ||||
1182 | uint64_t hashCallback(const void *key) { | |||
1183 | return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); | |||
1184 | } | |||
1185 | ||||
1186 | int compareCallback(void *privdata, const void *key1, const void *key2) { | |||
1187 | int l1,l2; | |||
1188 | DICT_NOTUSED(privdata)((void) privdata); | |||
1189 | ||||
1190 | l1 = sdslen((sds)key1); | |||
1191 | l2 = sdslen((sds)key2); | |||
1192 | if (l1 != l2) return 0; | |||
1193 | return memcmp(key1, key2, l1) == 0; | |||
1194 | } | |||
1195 | ||||
1196 | void freeCallback(void *privdata, void *val) { | |||
1197 | DICT_NOTUSED(privdata)((void) privdata); | |||
1198 | ||||
1199 | sdsfree(val); | |||
1200 | } | |||
1201 | ||||
1202 | dictType BenchmarkDictType = { | |||
1203 | hashCallback, | |||
1204 | NULL((void*)0), | |||
1205 | NULL((void*)0), | |||
1206 | compareCallback, | |||
1207 | freeCallback, | |||
1208 | NULL((void*)0), | |||
1209 | NULL((void*)0) | |||
1210 | }; | |||
1211 | ||||
1212 | #define start_benchmark() start = timeInMilliseconds() | |||
1213 | #define end_benchmark(msg) do { \ | |||
1214 | elapsed = timeInMilliseconds()-start; \ | |||
1215 | printf(msg ": %ld items in %lld ms\n", count, elapsed); \ | |||
1216 | } while(0) | |||
1217 | ||||
1218 | /* dict-benchmark [count] */ | |||
1219 | int main(int argc, char **argv) { | |||
1220 | long j; | |||
1221 | long long start, elapsed; | |||
1222 | dict *dict = dictCreate(&BenchmarkDictType,NULL((void*)0)); | |||
1223 | long count = 0; | |||
1224 | ||||
1225 | if (argc == 2) { | |||
1226 | count = strtol(argv[1],NULL((void*)0),10); | |||
1227 | } else { | |||
1228 | count = 5000000; | |||
1229 | } | |||
1230 | ||||
1231 | start_benchmark(); | |||
1232 | for (j = 0; j < count; j++) { | |||
1233 | int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j); | |||
1234 | assert(retval == DICT_OK)(__builtin_expect(!!((retval == 0)), 1)?(void)0 : (_serverAssert ("retval == DICT_OK","dict.c",1234),__builtin_unreachable())); | |||
1235 | } | |||
1236 | end_benchmark("Inserting"); | |||
1237 | assert((long)dictSize(dict) == count)(__builtin_expect(!!(((long)((dict)->ht[0].used+(dict)-> ht[1].used) == count)), 1)?(void)0 : (_serverAssert("(long)dictSize(dict) == count" ,"dict.c",1237),__builtin_unreachable())); | |||
1238 | ||||
1239 | /* Wait for rehashing. */ | |||
1240 | while (dictIsRehashing(dict)((dict)->rehashidx != -1)) { | |||
1241 | dictRehashMilliseconds(dict,100); | |||
1242 | } | |||
1243 | ||||
1244 | start_benchmark(); | |||
1245 | for (j = 0; j < count; j++) { | |||
1246 | sds key = sdsfromlonglong(j); | |||
1247 | dictEntry *de = dictFind(dict,key); | |||
1248 | assert(de != NULL)(__builtin_expect(!!((de != ((void*)0))), 1)?(void)0 : (_serverAssert ("de != NULL","dict.c",1248),__builtin_unreachable())); | |||
1249 | sdsfree(key); | |||
1250 | } | |||
1251 | end_benchmark("Linear access of existing elements"); | |||
1252 | ||||
1253 | start_benchmark(); | |||
1254 | for (j = 0; j < count; j++) { | |||
1255 | sds key = sdsfromlonglong(j); | |||
1256 | dictEntry *de = dictFind(dict,key); | |||
1257 | assert(de != NULL)(__builtin_expect(!!((de != ((void*)0))), 1)?(void)0 : (_serverAssert ("de != NULL","dict.c",1257),__builtin_unreachable())); | |||
1258 | sdsfree(key); | |||
1259 | } | |||
1260 | end_benchmark("Linear access of existing elements (2nd round)"); | |||
1261 | ||||
1262 | start_benchmark(); | |||
1263 | for (j = 0; j < count; j++) { | |||
1264 | sds key = sdsfromlonglong(rand() % count); | |||
1265 | dictEntry *de = dictFind(dict,key); | |||
1266 | assert(de != NULL)(__builtin_expect(!!((de != ((void*)0))), 1)?(void)0 : (_serverAssert ("de != NULL","dict.c",1266),__builtin_unreachable())); | |||
1267 | sdsfree(key); | |||
1268 | } | |||
1269 | end_benchmark("Random access of existing elements"); | |||
1270 | ||||
1271 | start_benchmark(); | |||
1272 | for (j = 0; j < count; j++) { | |||
1273 | dictEntry *de = dictGetRandomKey(dict); | |||
1274 | assert(de != NULL)(__builtin_expect(!!((de != ((void*)0))), 1)?(void)0 : (_serverAssert ("de != NULL","dict.c",1274),__builtin_unreachable())); | |||
1275 | } | |||
1276 | end_benchmark("Accessing random keys"); | |||
1277 | ||||
1278 | start_benchmark(); | |||
1279 | for (j = 0; j < count; j++) { | |||
1280 | sds key = sdsfromlonglong(rand() % count); | |||
1281 | key[0] = 'X'; | |||
1282 | dictEntry *de = dictFind(dict,key); | |||
1283 | assert(de == NULL)(__builtin_expect(!!((de == ((void*)0))), 1)?(void)0 : (_serverAssert ("de == NULL","dict.c",1283),__builtin_unreachable())); | |||
1284 | sdsfree(key); | |||
1285 | } | |||
1286 | end_benchmark("Accessing missing"); | |||
1287 | ||||
1288 | start_benchmark(); | |||
1289 | for (j = 0; j < count; j++) { | |||
1290 | sds key = sdsfromlonglong(j); | |||
1291 | int retval = dictDelete(dict,key); | |||
1292 | assert(retval == DICT_OK)(__builtin_expect(!!((retval == 0)), 1)?(void)0 : (_serverAssert ("retval == DICT_OK","dict.c",1292),__builtin_unreachable())); | |||
1293 | key[0] += 17; /* Change first number to letter. */ | |||
1294 | retval = dictAdd(dict,key,(void*)j); | |||
1295 | assert(retval == DICT_OK)(__builtin_expect(!!((retval == 0)), 1)?(void)0 : (_serverAssert ("retval == DICT_OK","dict.c",1295),__builtin_unreachable())); | |||
1296 | } | |||
1297 | end_benchmark("Removing and adding"); | |||
1298 | } | |||
1299 | #endif |