File: | src/rax.c |
Warning: | line 892, column 9 Access to field 'size' results in a dereference of a null pointer (loaded from variable 'h') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Rax -- A radix tree implementation. | |||
2 | * | |||
3 | * Version 1.2 -- 7 February 2019 | |||
4 | * | |||
5 | * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com> | |||
6 | * All rights reserved. | |||
7 | * | |||
8 | * Redistribution and use in source and binary forms, with or without | |||
9 | * modification, are permitted provided that the following conditions are met: | |||
10 | * | |||
11 | * * Redistributions of source code must retain the above copyright notice, | |||
12 | * this list of conditions and the following disclaimer. | |||
13 | * * Redistributions in binary form must reproduce the above copyright | |||
14 | * notice, this list of conditions and the following disclaimer in the | |||
15 | * documentation and/or other materials provided with the distribution. | |||
16 | * * Neither the name of Redis nor the names of its contributors may be used | |||
17 | * to endorse or promote products derived from this software without | |||
18 | * specific prior written permission. | |||
19 | * | |||
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
24 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
25 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
26 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
27 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
28 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
29 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
30 | * POSSIBILITY OF SUCH DAMAGE. | |||
31 | */ | |||
32 | ||||
33 | #include <stdlib.h> | |||
34 | #include <string.h> | |||
35 | #include <assert.h> | |||
36 | #include <stdio.h> | |||
37 | #include <errno(*__errno_location ()).h> | |||
38 | #include <math.h> | |||
39 | #include "rax.h" | |||
40 | ||||
41 | #ifndef RAX_MALLOC_INCLUDE"rax_malloc.h" | |||
42 | #define RAX_MALLOC_INCLUDE"rax_malloc.h" "rax_malloc.h" | |||
43 | #endif | |||
44 | ||||
45 | #include RAX_MALLOC_INCLUDE"rax_malloc.h" | |||
46 | ||||
47 | /* This is a special pointer that is guaranteed to never have the same value | |||
48 | * of a radix tree node. It's used in order to report "not found" error without | |||
49 | * requiring the function to have multiple return values. */ | |||
50 | void *raxNotFound = (void*)"rax-not-found-pointer"; | |||
51 | ||||
52 | /* -------------------------------- Debugging ------------------------------ */ | |||
53 | ||||
54 | void raxDebugShowNode(const char *msg, raxNode *n); | |||
55 | ||||
56 | /* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on. | |||
57 | * When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot | |||
58 | * of debugging info to the standard output, however you can still turn | |||
59 | * debugging on/off in order to enable it only when you suspect there is an | |||
60 | * operation causing a bug using the function raxSetDebugMsg(). */ | |||
61 | #ifdef RAX_DEBUG_MSG | |||
62 | #define debugf(...) \ | |||
63 | if (raxDebugMsg) { \ | |||
64 | printf("%s:%s:%d:\t", __FILE__"rax.c", __func__, __LINE__64); \ | |||
65 | printf(__VA_ARGS__); \ | |||
66 | fflush(stdoutstdout); \ | |||
67 | } | |||
68 | ||||
69 | #define debugnode(msg,n) raxDebugShowNode(msg,n) | |||
70 | #else | |||
71 | #define debugf(...) | |||
72 | #define debugnode(msg,n) | |||
73 | #endif | |||
74 | ||||
75 | /* By default log debug info if RAX_DEBUG_MSG is defined. */ | |||
76 | static int raxDebugMsg = 1; | |||
77 | ||||
78 | /* When debug messages are enabled, turn them on/off dynamically. By | |||
79 | * default they are enabled. Set the state to 0 to disable, and 1 to | |||
80 | * re-enable. */ | |||
81 | void raxSetDebugMsg(int onoff) { | |||
82 | raxDebugMsg = onoff; | |||
83 | } | |||
84 | ||||
85 | /* ------------------------- raxStack functions -------------------------- | |||
86 | * The raxStack is a simple stack of pointers that is capable of switching | |||
87 | * from using a stack-allocated array to dynamic heap once a given number of | |||
88 | * items are reached. It is used in order to retain the list of parent nodes | |||
89 | * while walking the radix tree in order to implement certain operations that | |||
90 | * need to navigate the tree upward. | |||
91 | * ------------------------------------------------------------------------- */ | |||
92 | ||||
93 | /* Initialize the stack. */ | |||
94 | static inline void raxStackInit(raxStack *ts) { | |||
95 | ts->stack = ts->static_items; | |||
96 | ts->items = 0; | |||
97 | ts->maxitems = RAX_STACK_STATIC_ITEMS32; | |||
98 | ts->oom = 0; | |||
99 | } | |||
100 | ||||
101 | /* Push an item into the stack, returns 1 on success, 0 on out of memory. */ | |||
102 | static inline int raxStackPush(raxStack *ts, void *ptr) { | |||
103 | if (ts->items == ts->maxitems) { | |||
104 | if (ts->stack == ts->static_items) { | |||
105 | ts->stack = rax_malloczmalloc(sizeof(void*)*ts->maxitems*2); | |||
106 | if (ts->stack == NULL((void*)0)) { | |||
107 | ts->stack = ts->static_items; | |||
108 | ts->oom = 1; | |||
109 | errno(*__errno_location ()) = ENOMEM12; | |||
110 | return 0; | |||
111 | } | |||
112 | memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems); | |||
113 | } else { | |||
114 | void **newalloc = rax_realloczrealloc(ts->stack,sizeof(void*)*ts->maxitems*2); | |||
115 | if (newalloc == NULL((void*)0)) { | |||
116 | ts->oom = 1; | |||
117 | errno(*__errno_location ()) = ENOMEM12; | |||
118 | return 0; | |||
119 | } | |||
120 | ts->stack = newalloc; | |||
121 | } | |||
122 | ts->maxitems *= 2; | |||
123 | } | |||
124 | ts->stack[ts->items] = ptr; | |||
125 | ts->items++; | |||
126 | return 1; | |||
127 | } | |||
128 | ||||
129 | /* Pop an item from the stack, the function returns NULL if there are no | |||
130 | * items to pop. */ | |||
131 | static inline void *raxStackPop(raxStack *ts) { | |||
132 | if (ts->items == 0) return NULL((void*)0); | |||
133 | ts->items--; | |||
134 | return ts->stack[ts->items]; | |||
135 | } | |||
136 | ||||
137 | /* Return the stack item at the top of the stack without actually consuming | |||
138 | * it. */ | |||
139 | static inline void *raxStackPeek(raxStack *ts) { | |||
140 | if (ts->items == 0) return NULL((void*)0); | |||
141 | return ts->stack[ts->items-1]; | |||
142 | } | |||
143 | ||||
144 | /* Free the stack in case we used heap allocation. */ | |||
145 | static inline void raxStackFree(raxStack *ts) { | |||
146 | if (ts->stack != ts->static_items) rax_freezfree(ts->stack); | |||
147 | } | |||
148 | ||||
149 | /* ---------------------------------------------------------------------------- | |||
150 | * Radix tree implementation | |||
151 | * --------------------------------------------------------------------------*/ | |||
152 | ||||
153 | /* Return the padding needed in the characters section of a node having size | |||
154 | * 'nodesize'. The padding is needed to store the child pointers to aligned | |||
155 | * addresses. Note that we add 4 to the node size because the node has a four | |||
156 | * bytes header. */ | |||
157 | #define raxPadding(nodesize)((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof (void*)-1)) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1)) | |||
158 | ||||
159 | /* Return the pointer to the last child pointer in a node. For the compressed | |||
160 | * nodes this is the only child pointer. */ | |||
161 | #define raxNodeLastChildPtr(n)((raxNode**) ( ((char*)(n)) + ( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(n)->size)+ (((n)->iskey && !(n)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) )) ((raxNode**) ( \ | |||
162 | ((char*)(n)) + \ | |||
163 | raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) ) - \ | |||
164 | sizeof(raxNode*) - \ | |||
165 | (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ | |||
166 | )) | |||
167 | ||||
168 | /* Return the pointer to the first child pointer. */ | |||
169 | #define raxNodeFirstChildPtr(n)((raxNode**) ( (n)->data + (n)->size + ((sizeof(void*)- (((n)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) ) ((raxNode**) ( \ | |||
170 | (n)->data + \ | |||
171 | (n)->size + \ | |||
172 | raxPadding((n)->size)((sizeof(void*)-(((n)->size+4) % sizeof(void*))) & (sizeof (void*)-1)))) | |||
173 | ||||
174 | /* Return the current total size of the node. Note that the second line | |||
175 | * computes the padding after the string of characters, needed in order to | |||
176 | * save pointers to aligned addresses. */ | |||
177 | #define raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) ) ( \ | |||
178 | sizeof(raxNode)+(n)->size+ \ | |||
179 | raxPadding((n)->size)((sizeof(void*)-(((n)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ \ | |||
180 | ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \ | |||
181 | (((n)->iskey && !(n)->isnull)*sizeof(void*)) \ | |||
182 | ) | |||
183 | ||||
184 | /* Allocate a new non compressed node with the specified number of children. | |||
185 | * If datafiled is true, the allocation is made large enough to hold the | |||
186 | * associated data pointer. | |||
187 | * Returns the new node pointer. On out of memory NULL is returned. */ | |||
188 | raxNode *raxNewNode(size_t children, int datafield) { | |||
189 | size_t nodesize = sizeof(raxNode)+children+raxPadding(children)((sizeof(void*)-((children+4) % sizeof(void*))) & (sizeof (void*)-1))+ | |||
190 | sizeof(raxNode*)*children; | |||
191 | if (datafield) nodesize += sizeof(void*); | |||
192 | raxNode *node = rax_malloczmalloc(nodesize); | |||
193 | if (node == NULL((void*)0)) return NULL((void*)0); | |||
194 | node->iskey = 0; | |||
195 | node->isnull = 0; | |||
196 | node->iscompr = 0; | |||
197 | node->size = children; | |||
198 | return node; | |||
199 | } | |||
200 | ||||
201 | /* Allocate a new rax and return its pointer. On out of memory the function | |||
202 | * returns NULL. */ | |||
203 | rax *raxNew(void) { | |||
204 | rax *rax = rax_malloczmalloc(sizeof(*rax)); | |||
205 | if (rax == NULL((void*)0)) return NULL((void*)0); | |||
206 | rax->numele = 0; | |||
207 | rax->numnodes = 1; | |||
208 | rax->head = raxNewNode(0,0); | |||
209 | if (rax->head == NULL((void*)0)) { | |||
210 | rax_freezfree(rax); | |||
211 | return NULL((void*)0); | |||
212 | } else { | |||
213 | return rax; | |||
214 | } | |||
215 | } | |||
216 | ||||
217 | /* realloc the node to make room for auxiliary data in order | |||
218 | * to store an item in that node. On out of memory NULL is returned. */ | |||
219 | raxNode *raxReallocForData(raxNode *n, void *data) { | |||
220 | if (data == NULL((void*)0)) return n; /* No reallocation needed, setting isnull=1 */ | |||
221 | size_t curlen = raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) ); | |||
222 | return rax_realloczrealloc(n,curlen+sizeof(void*)); | |||
223 | } | |||
224 | ||||
225 | /* Set the node auxiliary data to the specified pointer. */ | |||
226 | void raxSetData(raxNode *n, void *data) { | |||
227 | n->iskey = 1; | |||
228 | if (data != NULL((void*)0)) { | |||
229 | n->isnull = 0; | |||
230 | void **ndata = (void**) | |||
231 | ((char*)n+raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) )-sizeof(void*)); | |||
232 | memcpy(ndata,&data,sizeof(data)); | |||
233 | } else { | |||
234 | n->isnull = 1; | |||
235 | } | |||
236 | } | |||
237 | ||||
238 | /* Get the node auxiliary data. */ | |||
239 | void *raxGetData(raxNode *n) { | |||
240 | if (n->isnull) return NULL((void*)0); | |||
241 | void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) )-sizeof(void*)); | |||
242 | void *data; | |||
243 | memcpy(&data,ndata,sizeof(data)); | |||
244 | return data; | |||
245 | } | |||
246 | ||||
247 | /* Add a new child to the node 'n' representing the character 'c' and return | |||
248 | * its new pointer, as well as the child pointer by reference. Additionally | |||
249 | * '***parentlink' is populated with the raxNode pointer-to-pointer of where | |||
250 | * the new child was stored, which is useful for the caller to replace the | |||
251 | * child pointer if it gets reallocated. | |||
252 | * | |||
253 | * On success the new parent node pointer is returned (it may change because | |||
254 | * of the realloc, so the caller should discard 'n' and use the new value). | |||
255 | * On out of memory NULL is returned, and the old node is still valid. */ | |||
256 | raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) { | |||
257 | assert(n->iscompr == 0)((n->iscompr == 0) ? (void) (0) : __assert_fail ("n->iscompr == 0" , "rax.c", 257, __extension__ __PRETTY_FUNCTION__)); | |||
258 | ||||
259 | size_t curlen = raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) ); | |||
260 | n->size++; | |||
261 | size_t newlen = raxNodeCurrentLength(n)( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ (((n)-> iskey && !(n)->isnull)*sizeof(void*)) ); | |||
262 | n->size--; /* For now restore the orignal size. We'll update it only on | |||
263 | success at the end. */ | |||
264 | ||||
265 | /* Alloc the new child we will link to 'n'. */ | |||
266 | raxNode *child = raxNewNode(0,0); | |||
267 | if (child == NULL((void*)0)) return NULL((void*)0); | |||
268 | ||||
269 | /* Make space in the original node. */ | |||
270 | raxNode *newn = rax_realloczrealloc(n,newlen); | |||
271 | if (newn == NULL((void*)0)) { | |||
272 | rax_freezfree(child); | |||
273 | return NULL((void*)0); | |||
274 | } | |||
275 | n = newn; | |||
276 | ||||
277 | /* After the reallocation, we have up to 8/16 (depending on the system | |||
278 | * pointer size, and the required node padding) bytes at the end, that is, | |||
279 | * the additional char in the 'data' section, plus one pointer to the new | |||
280 | * child, plus the padding needed in order to store addresses into aligned | |||
281 | * locations. | |||
282 | * | |||
283 | * So if we start with the following node, having "abde" edges. | |||
284 | * | |||
285 | * Note: | |||
286 | * - We assume 4 bytes pointer for simplicity. | |||
287 | * - Each space below corresponds to one byte | |||
288 | * | |||
289 | * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP| | |||
290 | * | |||
291 | * After the reallocation we need: 1 byte for the new edge character | |||
292 | * plus 4 bytes for a new child pointer (assuming 32 bit machine). | |||
293 | * However after adding 1 byte to the edge char, the header + the edge | |||
294 | * characters are no longer aligned, so we also need 3 bytes of padding. | |||
295 | * In total the reallocation will add 1+4+3 bytes = 8 bytes: | |||
296 | * | |||
297 | * (Blank bytes are represented by ".") | |||
298 | * | |||
299 | * [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....] | |||
300 | * | |||
301 | * Let's find where to insert the new child in order to make sure | |||
302 | * it is inserted in-place lexicographically. Assuming we are adding | |||
303 | * a child "c" in our case pos will be = 2 after the end of the following | |||
304 | * loop. */ | |||
305 | int pos; | |||
306 | for (pos = 0; pos < n->size; pos++) { | |||
307 | if (n->data[pos] > c) break; | |||
308 | } | |||
309 | ||||
310 | /* Now, if present, move auxiliary data pointer at the end | |||
311 | * so that we can mess with the other data without overwriting it. | |||
312 | * We will obtain something like that: | |||
313 | * | |||
314 | * [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP| | |||
315 | */ | |||
316 | unsigned char *src, *dst; | |||
317 | if (n->iskey && !n->isnull) { | |||
318 | src = ((unsigned char*)n+curlen-sizeof(void*)); | |||
319 | dst = ((unsigned char*)n+newlen-sizeof(void*)); | |||
320 | memmove(dst,src,sizeof(void*)); | |||
321 | } | |||
322 | ||||
323 | /* Compute the "shift", that is, how many bytes we need to move the | |||
324 | * pointers section forward because of the addition of the new child | |||
325 | * byte in the string section. Note that if we had no padding, that | |||
326 | * would be always "1", since we are adding a single byte in the string | |||
327 | * section of the node (where now there is "abde" basically). | |||
328 | * | |||
329 | * However we have padding, so it could be zero, or up to 8. | |||
330 | * | |||
331 | * Another way to think at the shift is, how many bytes we need to | |||
332 | * move child pointers forward *other than* the obvious sizeof(void*) | |||
333 | * needed for the additional pointer itself. */ | |||
334 | size_t shift = newlen - curlen - sizeof(void*); | |||
335 | ||||
336 | /* We said we are adding a node with edge 'c'. The insertion | |||
337 | * point is between 'b' and 'd', so the 'pos' variable value is | |||
338 | * the index of the first child pointer that we need to move forward | |||
339 | * to make space for our new pointer. | |||
340 | * | |||
341 | * To start, move all the child pointers after the insertion point | |||
342 | * of shift+sizeof(pointer) bytes on the right, to obtain: | |||
343 | * | |||
344 | * [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP| | |||
345 | */ | |||
346 | src = n->data+n->size+ | |||
347 | raxPadding(n->size)((sizeof(void*)-((n->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ | |||
348 | sizeof(raxNode*)*pos; | |||
349 | memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos)); | |||
350 | ||||
351 | /* Move the pointers to the left of the insertion position as well. Often | |||
352 | * we don't need to do anything if there was already some padding to use. In | |||
353 | * that case the final destination of the pointers will be the same, however | |||
354 | * in our example there was no pre-existing padding, so we added one byte | |||
355 | * plus thre bytes of padding. After the next memmove() things will look | |||
356 | * like thata: | |||
357 | * | |||
358 | * [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP| | |||
359 | */ | |||
360 | if (shift) { | |||
361 | src = (unsigned char*) raxNodeFirstChildPtr(n)((raxNode**) ( (n)->data + (n)->size + ((sizeof(void*)- (((n)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) ); | |||
362 | memmove(src+shift,src,sizeof(raxNode*)*pos); | |||
363 | } | |||
364 | ||||
365 | /* Now make the space for the additional char in the data section, | |||
366 | * but also move the pointers before the insertion point to the right | |||
367 | * by shift bytes, in order to obtain the following: | |||
368 | * | |||
369 | * [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP| | |||
370 | */ | |||
371 | src = n->data+pos; | |||
372 | memmove(src+1,src,n->size-pos); | |||
373 | ||||
374 | /* We can now set the character and its child node pointer to get: | |||
375 | * | |||
376 | * [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP| | |||
377 | * [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP| | |||
378 | */ | |||
379 | n->data[pos] = c; | |||
380 | n->size++; | |||
381 | src = (unsigned char*) raxNodeFirstChildPtr(n)((raxNode**) ( (n)->data + (n)->size + ((sizeof(void*)- (((n)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) ); | |||
382 | raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos); | |||
383 | memcpy(childfield,&child,sizeof(child)); | |||
384 | *childptr = child; | |||
385 | *parentlink = childfield; | |||
386 | return n; | |||
387 | } | |||
388 | ||||
389 | /* Turn the node 'n', that must be a node without any children, into a | |||
390 | * compressed node representing a set of nodes linked one after the other | |||
391 | * and having exactly one child each. The node can be a key or not: this | |||
392 | * property and the associated value if any will be preserved. | |||
393 | * | |||
394 | * The function also returns a child node, since the last node of the | |||
395 | * compressed chain cannot be part of the chain: it has zero children while | |||
396 | * we can only compress inner nodes with exactly one child each. */ | |||
397 | raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) { | |||
398 | assert(n->size == 0 && n->iscompr == 0)((n->size == 0 && n->iscompr == 0) ? (void) (0) : __assert_fail ("n->size == 0 && n->iscompr == 0" , "rax.c", 398, __extension__ __PRETTY_FUNCTION__)); | |||
399 | void *data = NULL((void*)0); /* Initialized only to avoid warnings. */ | |||
400 | size_t newsize; | |||
401 | ||||
402 | debugf("Compress node: %.*s\n", (int)len,s); | |||
403 | ||||
404 | /* Allocate the child to link to this node. */ | |||
405 | *child = raxNewNode(0,0); | |||
406 | if (*child == NULL((void*)0)) return NULL((void*)0); | |||
407 | ||||
408 | /* Make space in the parent node. */ | |||
409 | newsize = sizeof(raxNode)+len+raxPadding(len)((sizeof(void*)-((len+4) % sizeof(void*))) & (sizeof(void *)-1))+sizeof(raxNode*); | |||
410 | if (n->iskey) { | |||
411 | data = raxGetData(n); /* To restore it later. */ | |||
412 | if (!n->isnull) newsize += sizeof(void*); | |||
413 | } | |||
414 | raxNode *newn = rax_realloczrealloc(n,newsize); | |||
415 | if (newn == NULL((void*)0)) { | |||
416 | rax_freezfree(*child); | |||
417 | return NULL((void*)0); | |||
418 | } | |||
419 | n = newn; | |||
420 | ||||
421 | n->iscompr = 1; | |||
422 | n->size = len; | |||
423 | memcpy(n->data,s,len); | |||
424 | if (n->iskey) raxSetData(n,data); | |||
425 | raxNode **childfield = raxNodeLastChildPtr(n)((raxNode**) ( ((char*)(n)) + ( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(n)->size)+ (((n)->iskey && !(n)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) )); | |||
426 | memcpy(childfield,child,sizeof(*child)); | |||
427 | return n; | |||
428 | } | |||
429 | ||||
430 | /* Low level function that walks the tree looking for the string | |||
431 | * 's' of 'len' bytes. The function returns the number of characters | |||
432 | * of the key that was possible to process: if the returned integer | |||
433 | * is the same as 'len', then it means that the node corresponding to the | |||
434 | * string was found (however it may not be a key in case the node->iskey is | |||
435 | * zero or if simply we stopped in the middle of a compressed node, so that | |||
436 | * 'splitpos' is non zero). | |||
437 | * | |||
438 | * Otherwise if the returned integer is not the same as 'len', there was an | |||
439 | * early stop during the tree walk because of a character mismatch. | |||
440 | * | |||
441 | * The node where the search ended (because the full string was processed | |||
442 | * or because there was an early stop) is returned by reference as | |||
443 | * '*stopnode' if the passed pointer is not NULL. This node link in the | |||
444 | * parent's node is returned as '*plink' if not NULL. Finally, if the | |||
445 | * search stopped in a compressed node, '*splitpos' returns the index | |||
446 | * inside the compressed node where the search ended. This is useful to | |||
447 | * know where to split the node for insertion. | |||
448 | * | |||
449 | * Note that when we stop in the middle of a compressed node with | |||
450 | * a perfect match, this function will return a length equal to the | |||
451 | * 'len' argument (all the key matched), and will return a *splitpos which is | |||
452 | * always positive (that will represent the index of the character immediately | |||
453 | * *after* the last match in the current compressed node). | |||
454 | * | |||
455 | * When instead we stop at a compressed node and *splitpos is zero, it | |||
456 | * means that the current node represents the key (that is, none of the | |||
457 | * compressed node characters are needed to represent the key, just all | |||
458 | * its parents nodes). */ | |||
459 | static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) { | |||
460 | raxNode *h = rax->head; | |||
461 | raxNode **parentlink = &rax->head; | |||
462 | ||||
463 | size_t i = 0; /* Position in the string. */ | |||
464 | size_t j = 0; /* Position in the node children (or bytes if compressed).*/ | |||
465 | while(h->size && i < len) { | |||
466 | debugnode("Lookup current node",h); | |||
467 | unsigned char *v = h->data; | |||
468 | ||||
469 | if (h->iscompr) { | |||
470 | for (j = 0; j < h->size && i < len; j++, i++) { | |||
471 | if (v[j] != s[i]) break; | |||
472 | } | |||
473 | if (j != h->size) break; | |||
474 | } else { | |||
475 | /* Even when h->size is large, linear scan provides good | |||
476 | * performances compared to other approaches that are in theory | |||
477 | * more sounding, like performing a binary search. */ | |||
478 | for (j = 0; j < h->size; j++) { | |||
479 | if (v[j] == s[i]) break; | |||
480 | } | |||
481 | if (j == h->size) break; | |||
482 | i++; | |||
483 | } | |||
484 | ||||
485 | if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */ | |||
486 | raxNode **children = raxNodeFirstChildPtr(h)((raxNode**) ( (h)->data + (h)->size + ((sizeof(void*)- (((h)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) ); | |||
487 | if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */ | |||
488 | memcpy(&h,children+j,sizeof(h)); | |||
489 | parentlink = children+j; | |||
490 | j = 0; /* If the new node is non compressed and we do not | |||
491 | iterate again (since i == len) set the split | |||
492 | position to 0 to signal this node represents | |||
493 | the searched key. */ | |||
494 | } | |||
495 | debugnode("Lookup stop node is",h); | |||
496 | if (stopnode) *stopnode = h; | |||
497 | if (plink) *plink = parentlink; | |||
498 | if (splitpos && h->iscompr) *splitpos = j; | |||
499 | return i; | |||
500 | } | |||
501 | ||||
502 | /* Insert the element 's' of size 'len', setting as auxiliary data | |||
503 | * the pointer 'data'. If the element is already present, the associated | |||
504 | * data is updated (only if 'overwrite' is set to 1), and 0 is returned, | |||
505 | * otherwise the element is inserted and 1 is returned. On out of memory the | |||
506 | * function returns 0 as well but sets errno to ENOMEM, otherwise errno will | |||
507 | * be set to 0. | |||
508 | */ | |||
509 | int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) { | |||
510 | size_t i; | |||
511 | int j = 0; /* Split position. If raxLowWalk() stops in a compressed | |||
512 | node, the index 'j' represents the char we stopped within the | |||
513 | compressed node, that is, the position where to split the | |||
514 | node for insertion. */ | |||
515 | raxNode *h, **parentlink; | |||
516 | ||||
517 | debugf("### Insert %.*s with value %p\n", (int)len, s, data); | |||
518 | i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL((void*)0)); | |||
519 | ||||
520 | /* If i == len we walked following the whole string. If we are not | |||
521 | * in the middle of a compressed node, the string is either already | |||
522 | * inserted or this middle node is currently not a key, but can represent | |||
523 | * our key. We have just to reallocate the node and make space for the | |||
524 | * data pointer. */ | |||
525 | if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { | |||
| ||||
526 | debugf("### Insert: node representing key exists\n"); | |||
527 | /* Make space for the value pointer if needed. */ | |||
528 | if (!h->iskey || (h->isnull && overwrite)) { | |||
529 | h = raxReallocForData(h,data); | |||
530 | if (h) memcpy(parentlink,&h,sizeof(h)); | |||
531 | } | |||
532 | if (h == NULL((void*)0)) { | |||
533 | errno(*__errno_location ()) = ENOMEM12; | |||
534 | return 0; | |||
535 | } | |||
536 | ||||
537 | /* Update the existing key if there is already one. */ | |||
538 | if (h->iskey) { | |||
539 | if (old) *old = raxGetData(h); | |||
540 | if (overwrite) raxSetData(h,data); | |||
541 | errno(*__errno_location ()) = 0; | |||
542 | return 0; /* Element already exists. */ | |||
543 | } | |||
544 | ||||
545 | /* Otherwise set the node as a key. Note that raxSetData() | |||
546 | * will set h->iskey. */ | |||
547 | raxSetData(h,data); | |||
548 | rax->numele++; | |||
549 | return 1; /* Element inserted. */ | |||
550 | } | |||
551 | ||||
552 | /* If the node we stopped at is a compressed node, we need to | |||
553 | * split it before to continue. | |||
554 | * | |||
555 | * Splitting a compressed node have a few possible cases. | |||
556 | * Imagine that the node 'h' we are currently at is a compressed | |||
557 | * node containing the string "ANNIBALE" (it means that it represents | |||
558 | * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child | |||
559 | * pointer of this node pointing at the 'E' node, because remember that | |||
560 | * we have characters at the edges of the graph, not inside the nodes | |||
561 | * themselves. | |||
562 | * | |||
563 | * In order to show a real case imagine our node to also point to | |||
564 | * another compressed node, that finally points at the node without | |||
565 | * children, representing 'O': | |||
566 | * | |||
567 | * "ANNIBALE" -> "SCO" -> [] | |||
568 | * | |||
569 | * When inserting we may face the following cases. Note that all the cases | |||
570 | * require the insertion of a non compressed node with exactly two | |||
571 | * children, except for the last case which just requires splitting a | |||
572 | * compressed node. | |||
573 | * | |||
574 | * 1) Inserting "ANNIENTARE" | |||
575 | * | |||
576 | * |B| -> "ALE" -> "SCO" -> [] | |||
577 | * "ANNI" -> |-| | |||
578 | * |E| -> (... continue algo ...) "NTARE" -> [] | |||
579 | * | |||
580 | * 2) Inserting "ANNIBALI" | |||
581 | * | |||
582 | * |E| -> "SCO" -> [] | |||
583 | * "ANNIBAL" -> |-| | |||
584 | * |I| -> (... continue algo ...) [] | |||
585 | * | |||
586 | * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node) | |||
587 | * | |||
588 | * |N| -> "NIBALE" -> "SCO" -> [] | |||
589 | * |A| -> |-| | |||
590 | * |G| -> (... continue algo ...) |O| -> [] | |||
591 | * | |||
592 | * 4) Inserting "CIAO" | |||
593 | * | |||
594 | * |A| -> "NNIBALE" -> "SCO" -> [] | |||
595 | * |-| | |||
596 | * |C| -> (... continue algo ...) "IAO" -> [] | |||
597 | * | |||
598 | * 5) Inserting "ANNI" | |||
599 | * | |||
600 | * "ANNI" -> "BALE" -> "SCO" -> [] | |||
601 | * | |||
602 | * The final algorithm for insertion covering all the above cases is as | |||
603 | * follows. | |||
604 | * | |||
605 | * ============================= ALGO 1 ============================= | |||
606 | * | |||
607 | * For the above cases 1 to 4, that is, all cases where we stopped in | |||
608 | * the middle of a compressed node for a character mismatch, do: | |||
609 | * | |||
610 | * Let $SPLITPOS be the zero-based index at which, in the | |||
611 | * compressed node array of characters, we found the mismatching | |||
612 | * character. For example if the node contains "ANNIBALE" and we add | |||
613 | * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the | |||
614 | * mismatching character is found. | |||
615 | * | |||
616 | * 1. Save the current compressed node $NEXT pointer (the pointer to the | |||
617 | * child element, that is always present in compressed nodes). | |||
618 | * | |||
619 | * 2. Create "split node" having as child the non common letter | |||
620 | * at the compressed node. The other non common letter (at the key) | |||
621 | * will be added later as we continue the normal insertion algorithm | |||
622 | * at step "6". | |||
623 | * | |||
624 | * 3a. IF $SPLITPOS == 0: | |||
625 | * Replace the old node with the split node, by copying the auxiliary | |||
626 | * data if any. Fix parent's reference. Free old node eventually | |||
627 | * (we still need its data for the next steps of the algorithm). | |||
628 | * | |||
629 | * 3b. IF $SPLITPOS != 0: | |||
630 | * Trim the compressed node (reallocating it as well) in order to | |||
631 | * contain $splitpos characters. Change child pointer in order to link | |||
632 | * to the split node. If new compressed node len is just 1, set | |||
633 | * iscompr to 0 (layout is the same). Fix parent's reference. | |||
634 | * | |||
635 | * 4a. IF the postfix len (the length of the remaining string of the | |||
636 | * original compressed node after the split character) is non zero, | |||
637 | * create a "postfix node". If the postfix node has just one character | |||
638 | * set iscompr to 0, otherwise iscompr to 1. Set the postfix node | |||
639 | * child pointer to $NEXT. | |||
640 | * | |||
641 | * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer. | |||
642 | * | |||
643 | * 5. Set child[0] of split node to postfix node. | |||
644 | * | |||
645 | * 6. Set the split node as the current node, set current index at child[1] | |||
646 | * and continue insertion algorithm as usually. | |||
647 | * | |||
648 | * ============================= ALGO 2 ============================= | |||
649 | * | |||
650 | * For case 5, that is, if we stopped in the middle of a compressed | |||
651 | * node but no mismatch was found, do: | |||
652 | * | |||
653 | * Let $SPLITPOS be the zero-based index at which, in the | |||
654 | * compressed node array of characters, we stopped iterating because | |||
655 | * there were no more keys character to match. So in the example of | |||
656 | * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4. | |||
657 | * | |||
658 | * 1. Save the current compressed node $NEXT pointer (the pointer to the | |||
659 | * child element, that is always present in compressed nodes). | |||
660 | * | |||
661 | * 2. Create a "postfix node" containing all the characters from $SPLITPOS | |||
662 | * to the end. Use $NEXT as the postfix node child pointer. | |||
663 | * If the postfix node length is 1, set iscompr to 0. | |||
664 | * Set the node as a key with the associated value of the new | |||
665 | * inserted key. | |||
666 | * | |||
667 | * 3. Trim the current node to contain the first $SPLITPOS characters. | |||
668 | * As usually if the new node length is just 1, set iscompr to 0. | |||
669 | * Take the iskey / associated value as it was in the orignal node. | |||
670 | * Fix the parent's reference. | |||
671 | * | |||
672 | * 4. Set the postfix node as the only child pointer of the trimmed | |||
673 | * node created at step 1. | |||
674 | */ | |||
675 | ||||
676 | /* ------------------------- ALGORITHM 1 --------------------------- */ | |||
677 | if (h->iscompr && i != len) { | |||
678 | debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n", | |||
679 | h->size, h->data, (void*)h); | |||
680 | debugf("Still to insert: %.*s\n", (int)(len-i), s+i); | |||
681 | debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]); | |||
682 | debugf("Other (key) letter is '%c'\n", s[i]); | |||
683 | ||||
684 | /* 1: Save next pointer. */ | |||
685 | raxNode **childfield = raxNodeLastChildPtr(h)((raxNode**) ( ((char*)(h)) + ( sizeof(raxNode)+(h)->size+ ((sizeof(void*)-(((h)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((h)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(h)->size)+ (((h)->iskey && !(h)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((h)->iskey && !(h)->isnull) ? sizeof(void*) : 0) )); | |||
686 | raxNode *next; | |||
687 | memcpy(&next,childfield,sizeof(next)); | |||
688 | debugf("Next is %p\n", (void*)next); | |||
689 | debugf("iskey %d\n", h->iskey); | |||
690 | if (h->iskey) { | |||
691 | debugf("key value is %p\n", raxGetData(h)); | |||
692 | } | |||
693 | ||||
694 | /* Set the length of the additional nodes we will need. */ | |||
695 | size_t trimmedlen = j; | |||
696 | size_t postfixlen = h->size - j - 1; | |||
697 | int split_node_is_key = !trimmedlen && h->iskey && !h->isnull; | |||
698 | size_t nodesize; | |||
699 | ||||
700 | /* 2: Create the split node. Also allocate the other nodes we'll need | |||
701 | * ASAP, so that it will be simpler to handle OOM. */ | |||
702 | raxNode *splitnode = raxNewNode(1, split_node_is_key); | |||
703 | raxNode *trimmed = NULL((void*)0); | |||
704 | raxNode *postfix = NULL((void*)0); | |||
705 | ||||
706 | if (trimmedlen) { | |||
707 | nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)((sizeof(void*)-((trimmedlen+4) % sizeof(void*))) & (sizeof (void*)-1))+ | |||
708 | sizeof(raxNode*); | |||
709 | if (h->iskey && !h->isnull) nodesize += sizeof(void*); | |||
710 | trimmed = rax_malloczmalloc(nodesize); | |||
711 | } | |||
712 | ||||
713 | if (postfixlen) { | |||
714 | nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)((sizeof(void*)-((postfixlen+4) % sizeof(void*))) & (sizeof (void*)-1))+ | |||
715 | sizeof(raxNode*); | |||
716 | postfix = rax_malloczmalloc(nodesize); | |||
717 | } | |||
718 | ||||
719 | /* OOM? Abort now that the tree is untouched. */ | |||
720 | if (splitnode == NULL((void*)0) || | |||
721 | (trimmedlen && trimmed == NULL((void*)0)) || | |||
722 | (postfixlen && postfix == NULL((void*)0))) | |||
723 | { | |||
724 | rax_freezfree(splitnode); | |||
725 | rax_freezfree(trimmed); | |||
726 | rax_freezfree(postfix); | |||
727 | errno(*__errno_location ()) = ENOMEM12; | |||
728 | return 0; | |||
729 | } | |||
730 | splitnode->data[0] = h->data[j]; | |||
731 | ||||
732 | if (j == 0) { | |||
733 | /* 3a: Replace the old node with the split node. */ | |||
734 | if (h->iskey) { | |||
735 | void *ndata = raxGetData(h); | |||
736 | raxSetData(splitnode,ndata); | |||
737 | } | |||
738 | memcpy(parentlink,&splitnode,sizeof(splitnode)); | |||
739 | } else { | |||
740 | /* 3b: Trim the compressed node. */ | |||
741 | trimmed->size = j; | |||
742 | memcpy(trimmed->data,h->data,j); | |||
743 | trimmed->iscompr = j > 1 ? 1 : 0; | |||
744 | trimmed->iskey = h->iskey; | |||
745 | trimmed->isnull = h->isnull; | |||
746 | if (h->iskey && !h->isnull) { | |||
747 | void *ndata = raxGetData(h); | |||
748 | raxSetData(trimmed,ndata); | |||
749 | } | |||
750 | raxNode **cp = raxNodeLastChildPtr(trimmed)((raxNode**) ( ((char*)(trimmed)) + ( sizeof(raxNode)+(trimmed )->size+ ((sizeof(void*)-(((trimmed)->size+4) % sizeof( void*))) & (sizeof(void*)-1))+ ((trimmed)->iscompr ? sizeof (raxNode*) : sizeof(raxNode*)*(trimmed)->size)+ (((trimmed )->iskey && !(trimmed)->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((trimmed)->iskey && !(trimmed )->isnull) ? sizeof(void*) : 0) )); | |||
751 | memcpy(cp,&splitnode,sizeof(splitnode)); | |||
752 | memcpy(parentlink,&trimmed,sizeof(trimmed)); | |||
753 | parentlink = cp; /* Set parentlink to splitnode parent. */ | |||
754 | rax->numnodes++; | |||
755 | } | |||
756 | ||||
757 | /* 4: Create the postfix node: what remains of the original | |||
758 | * compressed node after the split. */ | |||
759 | if (postfixlen) { | |||
760 | /* 4a: create a postfix node. */ | |||
761 | postfix->iskey = 0; | |||
762 | postfix->isnull = 0; | |||
763 | postfix->size = postfixlen; | |||
764 | postfix->iscompr = postfixlen > 1; | |||
765 | memcpy(postfix->data,h->data+j+1,postfixlen); | |||
766 | raxNode **cp = raxNodeLastChildPtr(postfix)((raxNode**) ( ((char*)(postfix)) + ( sizeof(raxNode)+(postfix )->size+ ((sizeof(void*)-(((postfix)->size+4) % sizeof( void*))) & (sizeof(void*)-1))+ ((postfix)->iscompr ? sizeof (raxNode*) : sizeof(raxNode*)*(postfix)->size)+ (((postfix )->iskey && !(postfix)->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((postfix)->iskey && !(postfix )->isnull) ? sizeof(void*) : 0) )); | |||
767 | memcpy(cp,&next,sizeof(next)); | |||
768 | rax->numnodes++; | |||
769 | } else { | |||
770 | /* 4b: just use next as postfix node. */ | |||
771 | postfix = next; | |||
772 | } | |||
773 | ||||
774 | /* 5: Set splitnode first child as the postfix node. */ | |||
775 | raxNode **splitchild = raxNodeLastChildPtr(splitnode)((raxNode**) ( ((char*)(splitnode)) + ( sizeof(raxNode)+(splitnode )->size+ ((sizeof(void*)-(((splitnode)->size+4) % sizeof (void*))) & (sizeof(void*)-1))+ ((splitnode)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(splitnode)->size)+ ( ((splitnode)->iskey && !(splitnode)->isnull)*sizeof (void*)) ) - sizeof(raxNode*) - (((splitnode)->iskey && !(splitnode)->isnull) ? sizeof(void*) : 0) )); | |||
776 | memcpy(splitchild,&postfix,sizeof(postfix)); | |||
777 | ||||
778 | /* 6. Continue insertion: this will cause the splitnode to | |||
779 | * get a new child (the non common character at the currently | |||
780 | * inserted key). */ | |||
781 | rax_freezfree(h); | |||
782 | h = splitnode; | |||
783 | } else if (h->iscompr
| |||
784 | /* ------------------------- ALGORITHM 2 --------------------------- */ | |||
785 | debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n", | |||
786 | h->size, h->data, (void*)h, j); | |||
787 | ||||
788 | /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */ | |||
789 | size_t postfixlen = h->size - j; | |||
790 | size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)((sizeof(void*)-((postfixlen+4) % sizeof(void*))) & (sizeof (void*)-1))+ | |||
791 | sizeof(raxNode*); | |||
792 | if (data != NULL((void*)0)) nodesize += sizeof(void*); | |||
793 | raxNode *postfix = rax_malloczmalloc(nodesize); | |||
794 | ||||
795 | nodesize = sizeof(raxNode)+j+raxPadding(j)((sizeof(void*)-((j+4) % sizeof(void*))) & (sizeof(void*) -1))+sizeof(raxNode*); | |||
796 | if (h->iskey && !h->isnull) nodesize += sizeof(void*); | |||
797 | raxNode *trimmed = rax_malloczmalloc(nodesize); | |||
798 | ||||
799 | if (postfix == NULL((void*)0) || trimmed == NULL((void*)0)) { | |||
800 | rax_freezfree(postfix); | |||
801 | rax_freezfree(trimmed); | |||
802 | errno(*__errno_location ()) = ENOMEM12; | |||
803 | return 0; | |||
804 | } | |||
805 | ||||
806 | /* 1: Save next pointer. */ | |||
807 | raxNode **childfield = raxNodeLastChildPtr(h)((raxNode**) ( ((char*)(h)) + ( sizeof(raxNode)+(h)->size+ ((sizeof(void*)-(((h)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((h)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(h)->size)+ (((h)->iskey && !(h)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((h)->iskey && !(h)->isnull) ? sizeof(void*) : 0) )); | |||
808 | raxNode *next; | |||
809 | memcpy(&next,childfield,sizeof(next)); | |||
810 | ||||
811 | /* 2: Create the postfix node. */ | |||
812 | postfix->size = postfixlen; | |||
813 | postfix->iscompr = postfixlen > 1; | |||
814 | postfix->iskey = 1; | |||
815 | postfix->isnull = 0; | |||
816 | memcpy(postfix->data,h->data+j,postfixlen); | |||
817 | raxSetData(postfix,data); | |||
818 | raxNode **cp = raxNodeLastChildPtr(postfix)((raxNode**) ( ((char*)(postfix)) + ( sizeof(raxNode)+(postfix )->size+ ((sizeof(void*)-(((postfix)->size+4) % sizeof( void*))) & (sizeof(void*)-1))+ ((postfix)->iscompr ? sizeof (raxNode*) : sizeof(raxNode*)*(postfix)->size)+ (((postfix )->iskey && !(postfix)->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((postfix)->iskey && !(postfix )->isnull) ? sizeof(void*) : 0) )); | |||
819 | memcpy(cp,&next,sizeof(next)); | |||
820 | rax->numnodes++; | |||
821 | ||||
822 | /* 3: Trim the compressed node. */ | |||
823 | trimmed->size = j; | |||
824 | trimmed->iscompr = j > 1; | |||
825 | trimmed->iskey = 0; | |||
826 | trimmed->isnull = 0; | |||
827 | memcpy(trimmed->data,h->data,j); | |||
828 | memcpy(parentlink,&trimmed,sizeof(trimmed)); | |||
829 | if (h->iskey) { | |||
830 | void *aux = raxGetData(h); | |||
831 | raxSetData(trimmed,aux); | |||
832 | } | |||
833 | ||||
834 | /* Fix the trimmed node child pointer to point to | |||
835 | * the postfix node. */ | |||
836 | cp = raxNodeLastChildPtr(trimmed)((raxNode**) ( ((char*)(trimmed)) + ( sizeof(raxNode)+(trimmed )->size+ ((sizeof(void*)-(((trimmed)->size+4) % sizeof( void*))) & (sizeof(void*)-1))+ ((trimmed)->iscompr ? sizeof (raxNode*) : sizeof(raxNode*)*(trimmed)->size)+ (((trimmed )->iskey && !(trimmed)->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((trimmed)->iskey && !(trimmed )->isnull) ? sizeof(void*) : 0) )); | |||
837 | memcpy(cp,&postfix,sizeof(postfix)); | |||
838 | ||||
839 | /* Finish! We don't need to continue with the insertion | |||
840 | * algorithm for ALGO 2. The key is already inserted. */ | |||
841 | rax->numele++; | |||
842 | rax_freezfree(h); | |||
843 | return 1; /* Key inserted. */ | |||
844 | } | |||
845 | ||||
846 | /* We walked the radix tree as far as we could, but still there are left | |||
847 | * chars in our string. We need to insert the missing nodes. */ | |||
848 | while(i < len) { | |||
849 | raxNode *child; | |||
850 | ||||
851 | /* If this node is going to have a single child, and there | |||
852 | * are other characters, so that that would result in a chain | |||
853 | * of single-childed nodes, turn it into a compressed node. */ | |||
854 | if (h->size == 0 && len-i > 1) { | |||
855 | debugf("Inserting compressed node\n"); | |||
856 | size_t comprsize = len-i; | |||
857 | if (comprsize > RAX_NODE_MAX_SIZE((1<<29)-1)) | |||
858 | comprsize = RAX_NODE_MAX_SIZE((1<<29)-1); | |||
859 | raxNode *newh = raxCompressNode(h,s+i,comprsize,&child); | |||
860 | if (newh == NULL((void*)0)) goto oom; | |||
861 | h = newh; | |||
862 | memcpy(parentlink,&h,sizeof(h)); | |||
863 | parentlink = raxNodeLastChildPtr(h)((raxNode**) ( ((char*)(h)) + ( sizeof(raxNode)+(h)->size+ ((sizeof(void*)-(((h)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((h)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(h)->size)+ (((h)->iskey && !(h)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((h)->iskey && !(h)->isnull) ? sizeof(void*) : 0) )); | |||
864 | i += comprsize; | |||
865 | } else { | |||
866 | debugf("Inserting normal node\n"); | |||
867 | raxNode **new_parentlink; | |||
868 | raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink); | |||
869 | if (newh == NULL((void*)0)) goto oom; | |||
870 | h = newh; | |||
871 | memcpy(parentlink,&h,sizeof(h)); | |||
872 | parentlink = new_parentlink; | |||
873 | i++; | |||
874 | } | |||
875 | rax->numnodes++; | |||
876 | h = child; | |||
877 | } | |||
878 | raxNode *newh = raxReallocForData(h,data); | |||
879 | if (newh == NULL((void*)0)) goto oom; | |||
880 | h = newh; | |||
881 | if (!h->iskey) rax->numele++; | |||
882 | raxSetData(h,data); | |||
883 | memcpy(parentlink,&h,sizeof(h)); | |||
884 | return 1; /* Element inserted. */ | |||
885 | ||||
886 | oom: | |||
887 | /* This code path handles out of memory after part of the sub-tree was | |||
888 | * already modified. Set the node as a key, and then remove it. However we | |||
889 | * do that only if the node is a terminal node, otherwise if the OOM | |||
890 | * happened reallocating a node in the middle, we don't need to free | |||
891 | * anything. */ | |||
892 | if (h->size == 0) { | |||
| ||||
893 | h->isnull = 1; | |||
894 | h->iskey = 1; | |||
895 | rax->numele++; /* Compensate the next remove. */ | |||
896 | assert(raxRemove(rax,s,i,NULL) != 0)((raxRemove(rax,s,i,((void*)0)) != 0) ? (void) (0) : __assert_fail ("raxRemove(rax,s,i,NULL) != 0", "rax.c", 896, __extension__ __PRETTY_FUNCTION__)); | |||
897 | } | |||
898 | errno(*__errno_location ()) = ENOMEM12; | |||
899 | return 0; | |||
900 | } | |||
901 | ||||
902 | /* Overwriting insert. Just a wrapper for raxGenericInsert() that will | |||
903 | * update the element if there is already one for the same key. */ | |||
904 | int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { | |||
905 | return raxGenericInsert(rax,s,len,data,old,1); | |||
906 | } | |||
907 | ||||
908 | /* Non overwriting insert function: this if an element with the same key | |||
909 | * exists, the value is not updated and the function returns 0. | |||
910 | * This is a just a wrapper for raxGenericInsert(). */ | |||
911 | int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { | |||
912 | return raxGenericInsert(rax,s,len,data,old,0); | |||
913 | } | |||
914 | ||||
915 | /* Find a key in the rax, returns raxNotFound special void pointer value | |||
916 | * if the item was not found, otherwise the value associated with the | |||
917 | * item is returned. */ | |||
918 | void *raxFind(rax *rax, unsigned char *s, size_t len) { | |||
919 | raxNode *h; | |||
920 | ||||
921 | debugf("### Lookup: %.*s\n", (int)len, s); | |||
922 | int splitpos = 0; | |||
923 | size_t i = raxLowWalk(rax,s,len,&h,NULL((void*)0),&splitpos,NULL((void*)0)); | |||
924 | if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) | |||
925 | return raxNotFound; | |||
926 | return raxGetData(h); | |||
927 | } | |||
928 | ||||
929 | /* Return the memory address where the 'parent' node stores the specified | |||
930 | * 'child' pointer, so that the caller can update the pointer with another | |||
931 | * one if needed. The function assumes it will find a match, otherwise the | |||
932 | * operation is an undefined behavior (it will continue scanning the | |||
933 | * memory without any bound checking). */ | |||
934 | raxNode **raxFindParentLink(raxNode *parent, raxNode *child) { | |||
935 | raxNode **cp = raxNodeFirstChildPtr(parent)((raxNode**) ( (parent)->data + (parent)->size + ((sizeof (void*)-(((parent)->size+4) % sizeof(void*))) & (sizeof (void*)-1)))); | |||
936 | raxNode *c; | |||
937 | while(1) { | |||
938 | memcpy(&c,cp,sizeof(c)); | |||
939 | if (c == child) break; | |||
940 | cp++; | |||
941 | } | |||
942 | return cp; | |||
943 | } | |||
944 | ||||
945 | /* Low level child removal from node. The new node pointer (after the child | |||
946 | * removal) is returned. Note that this function does not fix the pointer | |||
947 | * of the parent node in its parent, so this task is up to the caller. | |||
948 | * The function never fails for out of memory. */ | |||
949 | raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { | |||
950 | debugnode("raxRemoveChild before", parent); | |||
951 | /* If parent is a compressed node (having a single child, as for definition | |||
952 | * of the data structure), the removal of the child consists into turning | |||
953 | * it into a normal node without children. */ | |||
954 | if (parent->iscompr) { | |||
955 | void *data = NULL((void*)0); | |||
956 | if (parent->iskey) data = raxGetData(parent); | |||
957 | parent->isnull = 0; | |||
958 | parent->iscompr = 0; | |||
959 | parent->size = 0; | |||
960 | if (parent->iskey) raxSetData(parent,data); | |||
961 | debugnode("raxRemoveChild after", parent); | |||
962 | return parent; | |||
963 | } | |||
964 | ||||
965 | /* Otherwise we need to scan for the child pointer and memmove() | |||
966 | * accordingly. | |||
967 | * | |||
968 | * 1. To start we seek the first element in both the children | |||
969 | * pointers and edge bytes in the node. */ | |||
970 | raxNode **cp = raxNodeFirstChildPtr(parent)((raxNode**) ( (parent)->data + (parent)->size + ((sizeof (void*)-(((parent)->size+4) % sizeof(void*))) & (sizeof (void*)-1)))); | |||
971 | raxNode **c = cp; | |||
972 | unsigned char *e = parent->data; | |||
973 | ||||
974 | /* 2. Search the child pointer to remove inside the array of children | |||
975 | * pointers. */ | |||
976 | while(1) { | |||
977 | raxNode *aux; | |||
978 | memcpy(&aux,c,sizeof(aux)); | |||
979 | if (aux == child) break; | |||
980 | c++; | |||
981 | e++; | |||
982 | } | |||
983 | ||||
984 | /* 3. Remove the edge and the pointer by memmoving the remaining children | |||
985 | * pointer and edge bytes one position before. */ | |||
986 | int taillen = parent->size - (e - parent->data) - 1; | |||
987 | debugf("raxRemoveChild tail len: %d\n", taillen); | |||
988 | memmove(e,e+1,taillen); | |||
989 | ||||
990 | /* Compute the shift, that is the amount of bytes we should move our | |||
991 | * child pointers to the left, since the removal of one edge character | |||
992 | * and the corresponding padding change, may change the layout. | |||
993 | * We just check if in the old version of the node there was at the | |||
994 | * end just a single byte and all padding: in that case removing one char | |||
995 | * will remove a whole sizeof(void*) word. */ | |||
996 | size_t shift = ((parent->size+4) % sizeof(void*)) == 1 ? sizeof(void*) : 0; | |||
997 | ||||
998 | /* Move the children pointers before the deletion point. */ | |||
999 | if (shift) | |||
1000 | memmove(((char*)cp)-shift,cp,(parent->size-taillen-1)*sizeof(raxNode**)); | |||
1001 | ||||
1002 | /* Move the remaining "tail" pointers at the right position as well. */ | |||
1003 | size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0; | |||
1004 | memmove(((char*)c)-shift,c+1,taillen*sizeof(raxNode**)+valuelen); | |||
1005 | ||||
1006 | /* 4. Update size. */ | |||
1007 | parent->size--; | |||
1008 | ||||
1009 | /* realloc the node according to the theoretical memory usage, to free | |||
1010 | * data if we are over-allocating right now. */ | |||
1011 | raxNode *newnode = rax_realloczrealloc(parent,raxNodeCurrentLength(parent)( sizeof(raxNode)+(parent)->size+ ((sizeof(void*)-(((parent )->size+4) % sizeof(void*))) & (sizeof(void*)-1))+ ((parent )->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(parent)-> size)+ (((parent)->iskey && !(parent)->isnull)* sizeof(void*)) )); | |||
1012 | if (newnode) { | |||
1013 | debugnode("raxRemoveChild after", newnode); | |||
1014 | } | |||
1015 | /* Note: if rax_realloc() fails we just return the old address, which | |||
1016 | * is valid. */ | |||
1017 | return newnode ? newnode : parent; | |||
1018 | } | |||
1019 | ||||
1020 | /* Remove the specified item. Returns 1 if the item was found and | |||
1021 | * deleted, 0 otherwise. */ | |||
1022 | int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { | |||
1023 | raxNode *h; | |||
1024 | raxStack ts; | |||
1025 | ||||
1026 | debugf("### Delete: %.*s\n", (int)len, s); | |||
1027 | raxStackInit(&ts); | |||
1028 | int splitpos = 0; | |||
1029 | size_t i = raxLowWalk(rax,s,len,&h,NULL((void*)0),&splitpos,&ts); | |||
1030 | if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) { | |||
1031 | raxStackFree(&ts); | |||
1032 | return 0; | |||
1033 | } | |||
1034 | if (old) *old = raxGetData(h); | |||
1035 | h->iskey = 0; | |||
1036 | rax->numele--; | |||
1037 | ||||
1038 | /* If this node has no children, the deletion needs to reclaim the | |||
1039 | * no longer used nodes. This is an iterative process that needs to | |||
1040 | * walk the three upward, deleting all the nodes with just one child | |||
1041 | * that are not keys, until the head of the rax is reached or the first | |||
1042 | * node with more than one child is found. */ | |||
1043 | ||||
1044 | int trycompress = 0; /* Will be set to 1 if we should try to optimize the | |||
1045 | tree resulting from the deletion. */ | |||
1046 | ||||
1047 | if (h->size == 0) { | |||
1048 | debugf("Key deleted in node without children. Cleanup needed.\n"); | |||
1049 | raxNode *child = NULL((void*)0); | |||
1050 | while(h != rax->head) { | |||
1051 | child = h; | |||
1052 | debugf("Freeing child %p [%.*s] key:%d\n", (void*)child, | |||
1053 | (int)child->size, (char*)child->data, child->iskey); | |||
1054 | rax_freezfree(child); | |||
1055 | rax->numnodes--; | |||
1056 | h = raxStackPop(&ts); | |||
1057 | /* If this node has more then one child, or actually holds | |||
1058 | * a key, stop here. */ | |||
1059 | if (h->iskey || (!h->iscompr && h->size != 1)) break; | |||
1060 | } | |||
1061 | if (child) { | |||
1062 | debugf("Unlinking child %p from parent %p\n", | |||
1063 | (void*)child, (void*)h); | |||
1064 | raxNode *new = raxRemoveChild(h,child); | |||
1065 | if (new != h) { | |||
1066 | raxNode *parent = raxStackPeek(&ts); | |||
1067 | raxNode **parentlink; | |||
1068 | if (parent == NULL((void*)0)) { | |||
1069 | parentlink = &rax->head; | |||
1070 | } else { | |||
1071 | parentlink = raxFindParentLink(parent,h); | |||
1072 | } | |||
1073 | memcpy(parentlink,&new,sizeof(new)); | |||
1074 | } | |||
1075 | ||||
1076 | /* If after the removal the node has just a single child | |||
1077 | * and is not a key, we need to try to compress it. */ | |||
1078 | if (new->size == 1 && new->iskey == 0) { | |||
1079 | trycompress = 1; | |||
1080 | h = new; | |||
1081 | } | |||
1082 | } | |||
1083 | } else if (h->size == 1) { | |||
1084 | /* If the node had just one child, after the removal of the key | |||
1085 | * further compression with adjacent nodes is potentially possible. */ | |||
1086 | trycompress = 1; | |||
1087 | } | |||
1088 | ||||
1089 | /* Don't try node compression if our nodes pointers stack is not | |||
1090 | * complete because of OOM while executing raxLowWalk() */ | |||
1091 | if (trycompress && ts.oom) trycompress = 0; | |||
1092 | ||||
1093 | /* Recompression: if trycompress is true, 'h' points to a radix tree node | |||
1094 | * that changed in a way that could allow to compress nodes in this | |||
1095 | * sub-branch. Compressed nodes represent chains of nodes that are not | |||
1096 | * keys and have a single child, so there are two deletion events that | |||
1097 | * may alter the tree so that further compression is needed: | |||
1098 | * | |||
1099 | * 1) A node with a single child was a key and now no longer is a key. | |||
1100 | * 2) A node with two children now has just one child. | |||
1101 | * | |||
1102 | * We try to navigate upward till there are other nodes that can be | |||
1103 | * compressed, when we reach the upper node which is not a key and has | |||
1104 | * a single child, we scan the chain of children to collect the | |||
1105 | * compressable part of the tree, and replace the current node with the | |||
1106 | * new one, fixing the child pointer to reference the first non | |||
1107 | * compressable node. | |||
1108 | * | |||
1109 | * Example of case "1". A tree stores the keys "FOO" = 1 and | |||
1110 | * "FOOBAR" = 2: | |||
1111 | * | |||
1112 | * | |||
1113 | * "FOO" -> "BAR" -> [] (2) | |||
1114 | * (1) | |||
1115 | * | |||
1116 | * After the removal of "FOO" the tree can be compressed as: | |||
1117 | * | |||
1118 | * "FOOBAR" -> [] (2) | |||
1119 | * | |||
1120 | * | |||
1121 | * Example of case "2". A tree stores the keys "FOOBAR" = 1 and | |||
1122 | * "FOOTER" = 2: | |||
1123 | * | |||
1124 | * |B| -> "AR" -> [] (1) | |||
1125 | * "FOO" -> |-| | |||
1126 | * |T| -> "ER" -> [] (2) | |||
1127 | * | |||
1128 | * After the removal of "FOOTER" the resulting tree is: | |||
1129 | * | |||
1130 | * "FOO" -> |B| -> "AR" -> [] (1) | |||
1131 | * | |||
1132 | * That can be compressed into: | |||
1133 | * | |||
1134 | * "FOOBAR" -> [] (1) | |||
1135 | */ | |||
1136 | if (trycompress) { | |||
1137 | debugf("After removing %.*s:\n", (int)len, s); | |||
1138 | debugnode("Compression may be needed",h); | |||
1139 | debugf("Seek start node\n"); | |||
1140 | ||||
1141 | /* Try to reach the upper node that is compressible. | |||
1142 | * At the end of the loop 'h' will point to the first node we | |||
1143 | * can try to compress and 'parent' to its parent. */ | |||
1144 | raxNode *parent; | |||
1145 | while(1) { | |||
1146 | parent = raxStackPop(&ts); | |||
1147 | if (!parent || parent->iskey || | |||
1148 | (!parent->iscompr && parent->size != 1)) break; | |||
1149 | h = parent; | |||
1150 | debugnode("Going up to",h); | |||
1151 | } | |||
1152 | raxNode *start = h; /* Compression starting node. */ | |||
1153 | ||||
1154 | /* Scan chain of nodes we can compress. */ | |||
1155 | size_t comprsize = h->size; | |||
1156 | int nodes = 1; | |||
1157 | while(h->size != 0) { | |||
1158 | raxNode **cp = raxNodeLastChildPtr(h)((raxNode**) ( ((char*)(h)) + ( sizeof(raxNode)+(h)->size+ ((sizeof(void*)-(((h)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((h)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(h)->size)+ (((h)->iskey && !(h)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((h)->iskey && !(h)->isnull) ? sizeof(void*) : 0) )); | |||
1159 | memcpy(&h,cp,sizeof(h)); | |||
1160 | if (h->iskey || (!h->iscompr && h->size != 1)) break; | |||
1161 | /* Stop here if going to the next node would result into | |||
1162 | * a compressed node larger than h->size can hold. */ | |||
1163 | if (comprsize + h->size > RAX_NODE_MAX_SIZE((1<<29)-1)) break; | |||
1164 | nodes++; | |||
1165 | comprsize += h->size; | |||
1166 | } | |||
1167 | if (nodes > 1) { | |||
1168 | /* If we can compress, create the new node and populate it. */ | |||
1169 | size_t nodesize = | |||
1170 | sizeof(raxNode)+comprsize+raxPadding(comprsize)((sizeof(void*)-((comprsize+4) % sizeof(void*))) & (sizeof (void*)-1))+sizeof(raxNode*); | |||
1171 | raxNode *new = rax_malloczmalloc(nodesize); | |||
1172 | /* An out of memory here just means we cannot optimize this | |||
1173 | * node, but the tree is left in a consistent state. */ | |||
1174 | if (new == NULL((void*)0)) { | |||
1175 | raxStackFree(&ts); | |||
1176 | return 1; | |||
1177 | } | |||
1178 | new->iskey = 0; | |||
1179 | new->isnull = 0; | |||
1180 | new->iscompr = 1; | |||
1181 | new->size = comprsize; | |||
1182 | rax->numnodes++; | |||
1183 | ||||
1184 | /* Scan again, this time to populate the new node content and | |||
1185 | * to fix the new node child pointer. At the same time we free | |||
1186 | * all the nodes that we'll no longer use. */ | |||
1187 | comprsize = 0; | |||
1188 | h = start; | |||
1189 | while(h->size != 0) { | |||
1190 | memcpy(new->data+comprsize,h->data,h->size); | |||
1191 | comprsize += h->size; | |||
1192 | raxNode **cp = raxNodeLastChildPtr(h)((raxNode**) ( ((char*)(h)) + ( sizeof(raxNode)+(h)->size+ ((sizeof(void*)-(((h)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((h)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(h)->size)+ (((h)->iskey && !(h)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((h)->iskey && !(h)->isnull) ? sizeof(void*) : 0) )); | |||
1193 | raxNode *tofree = h; | |||
1194 | memcpy(&h,cp,sizeof(h)); | |||
1195 | rax_freezfree(tofree); rax->numnodes--; | |||
1196 | if (h->iskey || (!h->iscompr && h->size != 1)) break; | |||
1197 | } | |||
1198 | debugnode("New node",new); | |||
1199 | ||||
1200 | /* Now 'h' points to the first node that we still need to use, | |||
1201 | * so our new node child pointer will point to it. */ | |||
1202 | raxNode **cp = raxNodeLastChildPtr(new)((raxNode**) ( ((char*)(new)) + ( sizeof(raxNode)+(new)->size + ((sizeof(void*)-(((new)->size+4) % sizeof(void*))) & (sizeof(void*)-1))+ ((new)->iscompr ? sizeof(raxNode*) : sizeof (raxNode*)*(new)->size)+ (((new)->iskey && !(new )->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((new)-> iskey && !(new)->isnull) ? sizeof(void*) : 0) )); | |||
1203 | memcpy(cp,&h,sizeof(h)); | |||
1204 | ||||
1205 | /* Fix parent link. */ | |||
1206 | if (parent) { | |||
1207 | raxNode **parentlink = raxFindParentLink(parent,start); | |||
1208 | memcpy(parentlink,&new,sizeof(new)); | |||
1209 | } else { | |||
1210 | rax->head = new; | |||
1211 | } | |||
1212 | ||||
1213 | debugf("Compressed %d nodes, %d total bytes\n", | |||
1214 | nodes, (int)comprsize); | |||
1215 | } | |||
1216 | } | |||
1217 | raxStackFree(&ts); | |||
1218 | return 1; | |||
1219 | } | |||
1220 | ||||
1221 | /* This is the core of raxFree(): performs a depth-first scan of the | |||
1222 | * tree and releases all the nodes found. */ | |||
1223 | void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) { | |||
1224 | debugnode("free traversing",n); | |||
1225 | int numchildren = n->iscompr ? 1 : n->size; | |||
1226 | raxNode **cp = raxNodeLastChildPtr(n)((raxNode**) ( ((char*)(n)) + ( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(n)->size)+ (((n)->iskey && !(n)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) )); | |||
1227 | while(numchildren--) { | |||
1228 | raxNode *child; | |||
1229 | memcpy(&child,cp,sizeof(child)); | |||
1230 | raxRecursiveFree(rax,child,free_callback); | |||
1231 | cp--; | |||
1232 | } | |||
1233 | debugnode("free depth-first",n); | |||
1234 | if (free_callback && n->iskey && !n->isnull) | |||
1235 | free_callback(raxGetData(n)); | |||
1236 | rax_freezfree(n); | |||
1237 | rax->numnodes--; | |||
1238 | } | |||
1239 | ||||
1240 | /* Free a whole radix tree, calling the specified callback in order to | |||
1241 | * free the auxiliary data. */ | |||
1242 | void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) { | |||
1243 | raxRecursiveFree(rax,rax->head,free_callback); | |||
1244 | assert(rax->numnodes == 0)((rax->numnodes == 0) ? (void) (0) : __assert_fail ("rax->numnodes == 0" , "rax.c", 1244, __extension__ __PRETTY_FUNCTION__)); | |||
1245 | rax_freezfree(rax); | |||
1246 | } | |||
1247 | ||||
1248 | /* Free a whole radix tree. */ | |||
1249 | void raxFree(rax *rax) { | |||
1250 | raxFreeWithCallback(rax,NULL((void*)0)); | |||
1251 | } | |||
1252 | ||||
1253 | /* ------------------------------- Iterator --------------------------------- */ | |||
1254 | ||||
1255 | /* Initialize a Rax iterator. This call should be performed a single time | |||
1256 | * to initialize the iterator, and must be followed by a raxSeek() call, | |||
1257 | * otherwise the raxPrev()/raxNext() functions will just return EOF. */ | |||
1258 | void raxStart(raxIterator *it, rax *rt) { | |||
1259 | it->flags = RAX_ITER_EOF(1<<1); /* No crash if the iterator is not seeked. */ | |||
1260 | it->rt = rt; | |||
1261 | it->key_len = 0; | |||
1262 | it->key = it->key_static_string; | |||
1263 | it->key_max = RAX_ITER_STATIC_LEN128; | |||
1264 | it->data = NULL((void*)0); | |||
1265 | it->node_cb = NULL((void*)0); | |||
1266 | raxStackInit(&it->stack); | |||
1267 | } | |||
1268 | ||||
1269 | /* Append characters at the current key string of the iterator 'it'. This | |||
1270 | * is a low level function used to implement the iterator, not callable by | |||
1271 | * the user. Returns 0 on out of memory, otherwise 1 is returned. */ | |||
1272 | int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) { | |||
1273 | if (it->key_max < it->key_len+len) { | |||
1274 | unsigned char *old = (it->key == it->key_static_string) ? NULL((void*)0) : | |||
1275 | it->key; | |||
1276 | size_t new_max = (it->key_len+len)*2; | |||
1277 | it->key = rax_realloczrealloc(old,new_max); | |||
1278 | if (it->key == NULL((void*)0)) { | |||
1279 | it->key = (!old) ? it->key_static_string : old; | |||
1280 | errno(*__errno_location ()) = ENOMEM12; | |||
1281 | return 0; | |||
1282 | } | |||
1283 | if (old == NULL((void*)0)) memcpy(it->key,it->key_static_string,it->key_len); | |||
1284 | it->key_max = new_max; | |||
1285 | } | |||
1286 | /* Use memmove since there could be an overlap between 's' and | |||
1287 | * it->key when we use the current key in order to re-seek. */ | |||
1288 | memmove(it->key+it->key_len,s,len); | |||
1289 | it->key_len += len; | |||
1290 | return 1; | |||
1291 | } | |||
1292 | ||||
1293 | /* Remove the specified number of chars from the right of the current | |||
1294 | * iterator key. */ | |||
1295 | void raxIteratorDelChars(raxIterator *it, size_t count) { | |||
1296 | it->key_len -= count; | |||
1297 | } | |||
1298 | ||||
1299 | /* Do an iteration step towards the next element. At the end of the step the | |||
1300 | * iterator key will represent the (new) current key. If it is not possible | |||
1301 | * to step in the specified direction since there are no longer elements, the | |||
1302 | * iterator is flagged with RAX_ITER_EOF. | |||
1303 | * | |||
1304 | * If 'noup' is true the function starts directly scanning for the next | |||
1305 | * lexicographically smaller children, and the current node is already assumed | |||
1306 | * to be the parent of the last key node, so the first operation to go back to | |||
1307 | * the parent will be skipped. This option is used by raxSeek() when | |||
1308 | * implementing seeking a non existing element with the ">" or "<" options: | |||
1309 | * the starting node is not a key in that particular case, so we start the scan | |||
1310 | * from a node that does not represent the key set. | |||
1311 | * | |||
1312 | * The function returns 1 on success or 0 on out of memory. */ | |||
1313 | int raxIteratorNextStep(raxIterator *it, int noup) { | |||
1314 | if (it->flags & RAX_ITER_EOF(1<<1)) { | |||
1315 | return 1; | |||
1316 | } else if (it->flags & RAX_ITER_JUST_SEEKED(1<<0)) { | |||
1317 | it->flags &= ~RAX_ITER_JUST_SEEKED(1<<0); | |||
1318 | return 1; | |||
1319 | } | |||
1320 | ||||
1321 | /* Save key len, stack items and the node where we are currently | |||
1322 | * so that on iterator EOF we can restore the current key and state. */ | |||
1323 | size_t orig_key_len = it->key_len; | |||
1324 | size_t orig_stack_items = it->stack.items; | |||
1325 | raxNode *orig_node = it->node; | |||
1326 | ||||
1327 | while(1) { | |||
1328 | int children = it->node->iscompr ? 1 : it->node->size; | |||
1329 | if (!noup && children) { | |||
1330 | debugf("GO DEEPER\n"); | |||
1331 | /* Seek the lexicographically smaller key in this subtree, which | |||
1332 | * is the first one found always going towards the first child | |||
1333 | * of every successive node. */ | |||
1334 | if (!raxStackPush(&it->stack,it->node)) return 0; | |||
1335 | raxNode **cp = raxNodeFirstChildPtr(it->node)((raxNode**) ( (it->node)->data + (it->node)->size + ((sizeof(void*)-(((it->node)->size+4) % sizeof(void* ))) & (sizeof(void*)-1)))); | |||
1336 | if (!raxIteratorAddChars(it,it->node->data, | |||
1337 | it->node->iscompr ? it->node->size : 1)) return 0; | |||
1338 | memcpy(&it->node,cp,sizeof(it->node)); | |||
1339 | /* Call the node callback if any, and replace the node pointer | |||
1340 | * if the callback returns true. */ | |||
1341 | if (it->node_cb && it->node_cb(&it->node)) | |||
1342 | memcpy(cp,&it->node,sizeof(it->node)); | |||
1343 | /* For "next" step, stop every time we find a key along the | |||
1344 | * way, since the key is lexicograhically smaller compared to | |||
1345 | * what follows in the sub-children. */ | |||
1346 | if (it->node->iskey) { | |||
1347 | it->data = raxGetData(it->node); | |||
1348 | return 1; | |||
1349 | } | |||
1350 | } else { | |||
1351 | /* If we finished exploring the previous sub-tree, switch to the | |||
1352 | * new one: go upper until a node is found where there are | |||
1353 | * children representing keys lexicographically greater than the | |||
1354 | * current key. */ | |||
1355 | while(1) { | |||
1356 | int old_noup = noup; | |||
1357 | ||||
1358 | /* Already on head? Can't go up, iteration finished. */ | |||
1359 | if (!noup && it->node == it->rt->head) { | |||
1360 | it->flags |= RAX_ITER_EOF(1<<1); | |||
1361 | it->stack.items = orig_stack_items; | |||
1362 | it->key_len = orig_key_len; | |||
1363 | it->node = orig_node; | |||
1364 | return 1; | |||
1365 | } | |||
1366 | /* If there are no children at the current node, try parent's | |||
1367 | * next child. */ | |||
1368 | unsigned char prevchild = it->key[it->key_len-1]; | |||
1369 | if (!noup) { | |||
1370 | it->node = raxStackPop(&it->stack); | |||
1371 | } else { | |||
1372 | noup = 0; | |||
1373 | } | |||
1374 | /* Adjust the current key to represent the node we are | |||
1375 | * at. */ | |||
1376 | int todel = it->node->iscompr ? it->node->size : 1; | |||
1377 | raxIteratorDelChars(it,todel); | |||
1378 | ||||
1379 | /* Try visiting the next child if there was at least one | |||
1380 | * additional child. */ | |||
1381 | if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { | |||
1382 | raxNode **cp = raxNodeFirstChildPtr(it->node)((raxNode**) ( (it->node)->data + (it->node)->size + ((sizeof(void*)-(((it->node)->size+4) % sizeof(void* ))) & (sizeof(void*)-1)))); | |||
1383 | int i = 0; | |||
1384 | while (i < it->node->size) { | |||
1385 | debugf("SCAN NEXT %c\n", it->node->data[i]); | |||
1386 | if (it->node->data[i] > prevchild) break; | |||
1387 | i++; | |||
1388 | cp++; | |||
1389 | } | |||
1390 | if (i != it->node->size) { | |||
1391 | debugf("SCAN found a new node\n"); | |||
1392 | raxIteratorAddChars(it,it->node->data+i,1); | |||
1393 | if (!raxStackPush(&it->stack,it->node)) return 0; | |||
1394 | memcpy(&it->node,cp,sizeof(it->node)); | |||
1395 | /* Call the node callback if any, and replace the node | |||
1396 | * pointer if the callback returns true. */ | |||
1397 | if (it->node_cb && it->node_cb(&it->node)) | |||
1398 | memcpy(cp,&it->node,sizeof(it->node)); | |||
1399 | if (it->node->iskey) { | |||
1400 | it->data = raxGetData(it->node); | |||
1401 | return 1; | |||
1402 | } | |||
1403 | break; | |||
1404 | } | |||
1405 | } | |||
1406 | } | |||
1407 | } | |||
1408 | } | |||
1409 | } | |||
1410 | ||||
1411 | /* Seek the greatest key in the subtree at the current node. Return 0 on | |||
1412 | * out of memory, otherwise 1. This is an helper function for different | |||
1413 | * iteration functions below. */ | |||
1414 | int raxSeekGreatest(raxIterator *it) { | |||
1415 | while(it->node->size) { | |||
1416 | if (it->node->iscompr) { | |||
1417 | if (!raxIteratorAddChars(it,it->node->data, | |||
1418 | it->node->size)) return 0; | |||
1419 | } else { | |||
1420 | if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1)) | |||
1421 | return 0; | |||
1422 | } | |||
1423 | raxNode **cp = raxNodeLastChildPtr(it->node)((raxNode**) ( ((char*)(it->node)) + ( sizeof(raxNode)+(it ->node)->size+ ((sizeof(void*)-(((it->node)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((it->node )->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(it->node )->size)+ (((it->node)->iskey && !(it->node )->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((it-> node)->iskey && !(it->node)->isnull) ? sizeof (void*) : 0) )); | |||
1424 | if (!raxStackPush(&it->stack,it->node)) return 0; | |||
1425 | memcpy(&it->node,cp,sizeof(it->node)); | |||
1426 | } | |||
1427 | return 1; | |||
1428 | } | |||
1429 | ||||
1430 | /* Like raxIteratorNextStep() but implements an iteration step moving | |||
1431 | * to the lexicographically previous element. The 'noup' option has a similar | |||
1432 | * effect to the one of raxIteratorNextStep(). */ | |||
1433 | int raxIteratorPrevStep(raxIterator *it, int noup) { | |||
1434 | if (it->flags & RAX_ITER_EOF(1<<1)) { | |||
1435 | return 1; | |||
1436 | } else if (it->flags & RAX_ITER_JUST_SEEKED(1<<0)) { | |||
1437 | it->flags &= ~RAX_ITER_JUST_SEEKED(1<<0); | |||
1438 | return 1; | |||
1439 | } | |||
1440 | ||||
1441 | /* Save key len, stack items and the node where we are currently | |||
1442 | * so that on iterator EOF we can restore the current key and state. */ | |||
1443 | size_t orig_key_len = it->key_len; | |||
1444 | size_t orig_stack_items = it->stack.items; | |||
1445 | raxNode *orig_node = it->node; | |||
1446 | ||||
1447 | while(1) { | |||
1448 | int old_noup = noup; | |||
1449 | ||||
1450 | /* Already on head? Can't go up, iteration finished. */ | |||
1451 | if (!noup && it->node == it->rt->head) { | |||
1452 | it->flags |= RAX_ITER_EOF(1<<1); | |||
1453 | it->stack.items = orig_stack_items; | |||
1454 | it->key_len = orig_key_len; | |||
1455 | it->node = orig_node; | |||
1456 | return 1; | |||
1457 | } | |||
1458 | ||||
1459 | unsigned char prevchild = it->key[it->key_len-1]; | |||
1460 | if (!noup) { | |||
1461 | it->node = raxStackPop(&it->stack); | |||
1462 | } else { | |||
1463 | noup = 0; | |||
1464 | } | |||
1465 | ||||
1466 | /* Adjust the current key to represent the node we are | |||
1467 | * at. */ | |||
1468 | int todel = it->node->iscompr ? it->node->size : 1; | |||
1469 | raxIteratorDelChars(it,todel); | |||
1470 | ||||
1471 | /* Try visiting the prev child if there is at least one | |||
1472 | * child. */ | |||
1473 | if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { | |||
1474 | raxNode **cp = raxNodeLastChildPtr(it->node)((raxNode**) ( ((char*)(it->node)) + ( sizeof(raxNode)+(it ->node)->size+ ((sizeof(void*)-(((it->node)->size +4) % sizeof(void*))) & (sizeof(void*)-1))+ ((it->node )->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(it->node )->size)+ (((it->node)->iskey && !(it->node )->isnull)*sizeof(void*)) ) - sizeof(raxNode*) - (((it-> node)->iskey && !(it->node)->isnull) ? sizeof (void*) : 0) )); | |||
1475 | int i = it->node->size-1; | |||
1476 | while (i >= 0) { | |||
1477 | debugf("SCAN PREV %c\n", it->node->data[i]); | |||
1478 | if (it->node->data[i] < prevchild) break; | |||
1479 | i--; | |||
1480 | cp--; | |||
1481 | } | |||
1482 | /* If we found a new subtree to explore in this node, | |||
1483 | * go deeper following all the last children in order to | |||
1484 | * find the key lexicographically greater. */ | |||
1485 | if (i != -1) { | |||
1486 | debugf("SCAN found a new node\n"); | |||
1487 | /* Enter the node we just found. */ | |||
1488 | if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0; | |||
1489 | if (!raxStackPush(&it->stack,it->node)) return 0; | |||
1490 | memcpy(&it->node,cp,sizeof(it->node)); | |||
1491 | /* Seek sub-tree max. */ | |||
1492 | if (!raxSeekGreatest(it)) return 0; | |||
1493 | } | |||
1494 | } | |||
1495 | ||||
1496 | /* Return the key: this could be the key we found scanning a new | |||
1497 | * subtree, or if we did not find a new subtree to explore here, | |||
1498 | * before giving up with this node, check if it's a key itself. */ | |||
1499 | if (it->node->iskey) { | |||
1500 | it->data = raxGetData(it->node); | |||
1501 | return 1; | |||
1502 | } | |||
1503 | } | |||
1504 | } | |||
1505 | ||||
1506 | /* Seek an iterator at the specified element. | |||
1507 | * Return 0 if the seek failed for syntax error or out of memory. Otherwise | |||
1508 | * 1 is returned. When 0 is returned for out of memory, errno is set to | |||
1509 | * the ENOMEM value. */ | |||
1510 | int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { | |||
1511 | int eq = 0, lt = 0, gt = 0, first = 0, last = 0; | |||
1512 | ||||
1513 | it->stack.items = 0; /* Just resetting. Initialized by raxStart(). */ | |||
1514 | it->flags |= RAX_ITER_JUST_SEEKED(1<<0); | |||
1515 | it->flags &= ~RAX_ITER_EOF(1<<1); | |||
1516 | it->key_len = 0; | |||
1517 | it->node = NULL((void*)0); | |||
1518 | ||||
1519 | /* Set flags according to the operator used to perform the seek. */ | |||
1520 | if (op[0] == '>') { | |||
1521 | gt = 1; | |||
1522 | if (op[1] == '=') eq = 1; | |||
1523 | } else if (op[0] == '<') { | |||
1524 | lt = 1; | |||
1525 | if (op[1] == '=') eq = 1; | |||
1526 | } else if (op[0] == '=') { | |||
1527 | eq = 1; | |||
1528 | } else if (op[0] == '^') { | |||
1529 | first = 1; | |||
1530 | } else if (op[0] == '$') { | |||
1531 | last = 1; | |||
1532 | } else { | |||
1533 | errno(*__errno_location ()) = 0; | |||
1534 | return 0; /* Error. */ | |||
1535 | } | |||
1536 | ||||
1537 | /* If there are no elements, set the EOF condition immediately and | |||
1538 | * return. */ | |||
1539 | if (it->rt->numele == 0) { | |||
1540 | it->flags |= RAX_ITER_EOF(1<<1); | |||
1541 | return 1; | |||
1542 | } | |||
1543 | ||||
1544 | if (first) { | |||
1545 | /* Seeking the first key greater or equal to the empty string | |||
1546 | * is equivalent to seeking the smaller key available. */ | |||
1547 | return raxSeek(it,">=",NULL((void*)0),0); | |||
1548 | } | |||
1549 | ||||
1550 | if (last) { | |||
1551 | /* Find the greatest key taking always the last child till a | |||
1552 | * final node is found. */ | |||
1553 | it->node = it->rt->head; | |||
1554 | if (!raxSeekGreatest(it)) return 0; | |||
1555 | assert(it->node->iskey)((it->node->iskey) ? (void) (0) : __assert_fail ("it->node->iskey" , "rax.c", 1555, __extension__ __PRETTY_FUNCTION__)); | |||
1556 | it->data = raxGetData(it->node); | |||
1557 | return 1; | |||
1558 | } | |||
1559 | ||||
1560 | /* We need to seek the specified key. What we do here is to actually | |||
1561 | * perform a lookup, and later invoke the prev/next key code that | |||
1562 | * we already use for iteration. */ | |||
1563 | int splitpos = 0; | |||
1564 | size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL((void*)0),&splitpos,&it->stack); | |||
1565 | ||||
1566 | /* Return OOM on incomplete stack info. */ | |||
1567 | if (it->stack.oom) return 0; | |||
1568 | ||||
1569 | if (eq && i == len && (!it->node->iscompr || splitpos == 0) && | |||
1570 | it->node->iskey) | |||
1571 | { | |||
1572 | /* We found our node, since the key matches and we have an | |||
1573 | * "equal" condition. */ | |||
1574 | if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */ | |||
1575 | it->data = raxGetData(it->node); | |||
1576 | } else if (lt || gt) { | |||
1577 | /* Exact key not found or eq flag not set. We have to set as current | |||
1578 | * key the one represented by the node we stopped at, and perform | |||
1579 | * a next/prev operation to seek. To reconstruct the key at this node | |||
1580 | * we start from the parent and go to the current node, accumulating | |||
1581 | * the characters found along the way. */ | |||
1582 | if (!raxStackPush(&it->stack,it->node)) return 0; | |||
1583 | for (size_t j = 1; j < it->stack.items; j++) { | |||
1584 | raxNode *parent = it->stack.stack[j-1]; | |||
1585 | raxNode *child = it->stack.stack[j]; | |||
1586 | if (parent->iscompr) { | |||
1587 | if (!raxIteratorAddChars(it,parent->data,parent->size)) | |||
1588 | return 0; | |||
1589 | } else { | |||
1590 | raxNode **cp = raxNodeFirstChildPtr(parent)((raxNode**) ( (parent)->data + (parent)->size + ((sizeof (void*)-(((parent)->size+4) % sizeof(void*))) & (sizeof (void*)-1)))); | |||
1591 | unsigned char *p = parent->data; | |||
1592 | while(1) { | |||
1593 | raxNode *aux; | |||
1594 | memcpy(&aux,cp,sizeof(aux)); | |||
1595 | if (aux == child) break; | |||
1596 | cp++; | |||
1597 | p++; | |||
1598 | } | |||
1599 | if (!raxIteratorAddChars(it,p,1)) return 0; | |||
1600 | } | |||
1601 | } | |||
1602 | raxStackPop(&it->stack); | |||
1603 | ||||
1604 | /* We need to set the iterator in the correct state to call next/prev | |||
1605 | * step in order to seek the desired element. */ | |||
1606 | debugf("After initial seek: i=%d len=%d key=%.*s\n", | |||
1607 | (int)i, (int)len, (int)it->key_len, it->key); | |||
1608 | if (i != len && !it->node->iscompr) { | |||
1609 | /* If we stopped in the middle of a normal node because of a | |||
1610 | * mismatch, add the mismatching character to the current key | |||
1611 | * and call the iterator with the 'noup' flag so that it will try | |||
1612 | * to seek the next/prev child in the current node directly based | |||
1613 | * on the mismatching character. */ | |||
1614 | if (!raxIteratorAddChars(it,ele+i,1)) return 0; | |||
1615 | debugf("Seek normal node on mismatch: %.*s\n", | |||
1616 | (int)it->key_len, (char*)it->key); | |||
1617 | ||||
1618 | it->flags &= ~RAX_ITER_JUST_SEEKED(1<<0); | |||
1619 | if (lt && !raxIteratorPrevStep(it,1)) return 0; | |||
1620 | if (gt && !raxIteratorNextStep(it,1)) return 0; | |||
1621 | it->flags |= RAX_ITER_JUST_SEEKED(1<<0); /* Ignore next call. */ | |||
1622 | } else if (i != len && it->node->iscompr) { | |||
1623 | debugf("Compressed mismatch: %.*s\n", | |||
1624 | (int)it->key_len, (char*)it->key); | |||
1625 | /* In case of a mismatch within a compressed node. */ | |||
1626 | int nodechar = it->node->data[splitpos]; | |||
1627 | int keychar = ele[i]; | |||
1628 | it->flags &= ~RAX_ITER_JUST_SEEKED(1<<0); | |||
1629 | if (gt) { | |||
1630 | /* If the key the compressed node represents is greater | |||
1631 | * than our seek element, continue forward, otherwise set the | |||
1632 | * state in order to go back to the next sub-tree. */ | |||
1633 | if (nodechar > keychar) { | |||
1634 | if (!raxIteratorNextStep(it,0)) return 0; | |||
1635 | } else { | |||
1636 | if (!raxIteratorAddChars(it,it->node->data,it->node->size)) | |||
1637 | return 0; | |||
1638 | if (!raxIteratorNextStep(it,1)) return 0; | |||
1639 | } | |||
1640 | } | |||
1641 | if (lt) { | |||
1642 | /* If the key the compressed node represents is smaller | |||
1643 | * than our seek element, seek the greater key in this | |||
1644 | * subtree, otherwise set the state in order to go back to | |||
1645 | * the previous sub-tree. */ | |||
1646 | if (nodechar < keychar) { | |||
1647 | if (!raxSeekGreatest(it)) return 0; | |||
1648 | it->data = raxGetData(it->node); | |||
1649 | } else { | |||
1650 | if (!raxIteratorAddChars(it,it->node->data,it->node->size)) | |||
1651 | return 0; | |||
1652 | if (!raxIteratorPrevStep(it,1)) return 0; | |||
1653 | } | |||
1654 | } | |||
1655 | it->flags |= RAX_ITER_JUST_SEEKED(1<<0); /* Ignore next call. */ | |||
1656 | } else { | |||
1657 | debugf("No mismatch: %.*s\n", | |||
1658 | (int)it->key_len, (char*)it->key); | |||
1659 | /* If there was no mismatch we are into a node representing the | |||
1660 | * key, (but which is not a key or the seek operator does not | |||
1661 | * include 'eq'), or we stopped in the middle of a compressed node | |||
1662 | * after processing all the key. Continue iterating as this was | |||
1663 | * a legitimate key we stopped at. */ | |||
1664 | it->flags &= ~RAX_ITER_JUST_SEEKED(1<<0); | |||
1665 | if (it->node->iscompr && it->node->iskey && splitpos && lt) { | |||
1666 | /* If we stopped in the middle of a compressed node with | |||
1667 | * perfect match, and the condition is to seek a key "<" than | |||
1668 | * the specified one, then if this node is a key it already | |||
1669 | * represents our match. For instance we may have nodes: | |||
1670 | * | |||
1671 | * "f" -> "oobar" = 1 -> "" = 2 | |||
1672 | * | |||
1673 | * Representing keys "f" = 1, "foobar" = 2. A seek for | |||
1674 | * the key < "foo" will stop in the middle of the "oobar" | |||
1675 | * node, but will be our match, representing the key "f". | |||
1676 | * | |||
1677 | * So in that case, we don't seek backward. */ | |||
1678 | it->data = raxGetData(it->node); | |||
1679 | } else { | |||
1680 | if (gt && !raxIteratorNextStep(it,0)) return 0; | |||
1681 | if (lt && !raxIteratorPrevStep(it,0)) return 0; | |||
1682 | } | |||
1683 | it->flags |= RAX_ITER_JUST_SEEKED(1<<0); /* Ignore next call. */ | |||
1684 | } | |||
1685 | } else { | |||
1686 | /* If we are here just eq was set but no match was found. */ | |||
1687 | it->flags |= RAX_ITER_EOF(1<<1); | |||
1688 | return 1; | |||
1689 | } | |||
1690 | return 1; | |||
1691 | } | |||
1692 | ||||
1693 | /* Go to the next element in the scope of the iterator 'it'. | |||
1694 | * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is | |||
1695 | * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ | |||
1696 | int raxNext(raxIterator *it) { | |||
1697 | if (!raxIteratorNextStep(it,0)) { | |||
1698 | errno(*__errno_location ()) = ENOMEM12; | |||
1699 | return 0; | |||
1700 | } | |||
1701 | if (it->flags & RAX_ITER_EOF(1<<1)) { | |||
1702 | errno(*__errno_location ()) = 0; | |||
1703 | return 0; | |||
1704 | } | |||
1705 | return 1; | |||
1706 | } | |||
1707 | ||||
1708 | /* Go to the previous element in the scope of the iterator 'it'. | |||
1709 | * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is | |||
1710 | * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ | |||
1711 | int raxPrev(raxIterator *it) { | |||
1712 | if (!raxIteratorPrevStep(it,0)) { | |||
1713 | errno(*__errno_location ()) = ENOMEM12; | |||
1714 | return 0; | |||
1715 | } | |||
1716 | if (it->flags & RAX_ITER_EOF(1<<1)) { | |||
1717 | errno(*__errno_location ()) = 0; | |||
1718 | return 0; | |||
1719 | } | |||
1720 | return 1; | |||
1721 | } | |||
1722 | ||||
1723 | /* Perform a random walk starting in the current position of the iterator. | |||
1724 | * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned | |||
1725 | * and the iterator is set to the node reached after doing a random walk | |||
1726 | * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed | |||
1727 | * using a random number of steps between 1 and two times the logarithm of | |||
1728 | * the number of elements. | |||
1729 | * | |||
1730 | * NOTE: if you use this function to generate random elements from the radix | |||
1731 | * tree, expect a disappointing distribution. A random walk produces good | |||
1732 | * random elements if the tree is not sparse, however in the case of a radix | |||
1733 | * tree certain keys will be reported much more often than others. At least | |||
1734 | * this function should be able to explore every possible element eventually. */ | |||
1735 | int raxRandomWalk(raxIterator *it, size_t steps) { | |||
1736 | if (it->rt->numele == 0) { | |||
1737 | it->flags |= RAX_ITER_EOF(1<<1); | |||
1738 | return 0; | |||
1739 | } | |||
1740 | ||||
1741 | if (steps == 0) { | |||
1742 | size_t fle = 1+floor(log(it->rt->numele)); | |||
1743 | fle *= 2; | |||
1744 | steps = 1 + rand() % fle; | |||
1745 | } | |||
1746 | ||||
1747 | raxNode *n = it->node; | |||
1748 | while(steps > 0 || !n->iskey) { | |||
1749 | int numchildren = n->iscompr ? 1 : n->size; | |||
1750 | int r = rand() % (numchildren+(n != it->rt->head)); | |||
1751 | ||||
1752 | if (r == numchildren) { | |||
1753 | /* Go up to parent. */ | |||
1754 | n = raxStackPop(&it->stack); | |||
1755 | int todel = n->iscompr ? n->size : 1; | |||
1756 | raxIteratorDelChars(it,todel); | |||
1757 | } else { | |||
1758 | /* Select a random child. */ | |||
1759 | if (n->iscompr) { | |||
1760 | if (!raxIteratorAddChars(it,n->data,n->size)) return 0; | |||
1761 | } else { | |||
1762 | if (!raxIteratorAddChars(it,n->data+r,1)) return 0; | |||
1763 | } | |||
1764 | raxNode **cp = raxNodeFirstChildPtr(n)((raxNode**) ( (n)->data + (n)->size + ((sizeof(void*)- (((n)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) )+r; | |||
1765 | if (!raxStackPush(&it->stack,n)) return 0; | |||
1766 | memcpy(&n,cp,sizeof(n)); | |||
1767 | } | |||
1768 | if (n->iskey) steps--; | |||
1769 | } | |||
1770 | it->node = n; | |||
1771 | it->data = raxGetData(it->node); | |||
1772 | return 1; | |||
1773 | } | |||
1774 | ||||
1775 | /* Compare the key currently pointed by the iterator to the specified | |||
1776 | * key according to the specified operator. Returns 1 if the comparison is | |||
1777 | * true, otherwise 0 is returned. */ | |||
1778 | int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) { | |||
1779 | int eq = 0, lt = 0, gt = 0; | |||
1780 | ||||
1781 | if (op[0] == '=' || op[1] == '=') eq = 1; | |||
1782 | if (op[0] == '>') gt = 1; | |||
1783 | else if (op[0] == '<') lt = 1; | |||
1784 | else if (op[1] != '=') return 0; /* Syntax error. */ | |||
1785 | ||||
1786 | size_t minlen = key_len < iter->key_len ? key_len : iter->key_len; | |||
1787 | int cmp = memcmp(iter->key,key,minlen); | |||
1788 | ||||
1789 | /* Handle == */ | |||
1790 | if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len; | |||
1791 | ||||
1792 | /* Handle >, >=, <, <= */ | |||
1793 | if (cmp == 0) { | |||
1794 | /* Same prefix: longer wins. */ | |||
1795 | if (eq && key_len == iter->key_len) return 1; | |||
1796 | else if (lt) return iter->key_len < key_len; | |||
1797 | else if (gt) return iter->key_len > key_len; | |||
1798 | else return 0; /* Avoid warning, just 'eq' is handled before. */ | |||
1799 | } else if (cmp > 0) { | |||
1800 | return gt ? 1 : 0; | |||
1801 | } else /* (cmp < 0) */ { | |||
1802 | return lt ? 1 : 0; | |||
1803 | } | |||
1804 | } | |||
1805 | ||||
1806 | /* Free the iterator. */ | |||
1807 | void raxStop(raxIterator *it) { | |||
1808 | if (it->key != it->key_static_string) rax_freezfree(it->key); | |||
1809 | raxStackFree(&it->stack); | |||
1810 | } | |||
1811 | ||||
1812 | /* Return if the iterator is in an EOF state. This happens when raxSeek() | |||
1813 | * failed to seek an appropriate element, so that raxNext() or raxPrev() | |||
1814 | * will return zero, or when an EOF condition was reached while iterating | |||
1815 | * with raxNext() and raxPrev(). */ | |||
1816 | int raxEOF(raxIterator *it) { | |||
1817 | return it->flags & RAX_ITER_EOF(1<<1); | |||
1818 | } | |||
1819 | ||||
1820 | /* Return the number of elements inside the radix tree. */ | |||
1821 | uint64_t raxSize(rax *rax) { | |||
1822 | return rax->numele; | |||
1823 | } | |||
1824 | ||||
1825 | /* ----------------------------- Introspection ------------------------------ */ | |||
1826 | ||||
1827 | /* This function is mostly used for debugging and learning purposes. | |||
1828 | * It shows an ASCII representation of a tree on standard output, outline | |||
1829 | * all the nodes and the contained keys. | |||
1830 | * | |||
1831 | * The representation is as follow: | |||
1832 | * | |||
1833 | * "foobar" (compressed node) | |||
1834 | * [abc] (normal node with three children) | |||
1835 | * [abc]=0x12345678 (node is a key, pointing to value 0x12345678) | |||
1836 | * [] (a normal empty node) | |||
1837 | * | |||
1838 | * Children are represented in new indented lines, each children prefixed by | |||
1839 | * the "`-(x)" string, where "x" is the edge byte. | |||
1840 | * | |||
1841 | * [abc] | |||
1842 | * `-(a) "ladin" | |||
1843 | * `-(b) [kj] | |||
1844 | * `-(c) [] | |||
1845 | * | |||
1846 | * However when a node has a single child the following representation | |||
1847 | * is used instead: | |||
1848 | * | |||
1849 | * [abc] -> "ladin" -> [] | |||
1850 | */ | |||
1851 | ||||
1852 | /* The actual implementation of raxShow(). */ | |||
1853 | void raxRecursiveShow(int level, int lpad, raxNode *n) { | |||
1854 | char s = n->iscompr ? '"' : '['; | |||
1855 | char e = n->iscompr ? '"' : ']'; | |||
1856 | ||||
1857 | int numchars = printf("%c%.*s%c", s, n->size, n->data, e); | |||
1858 | if (n->iskey) { | |||
1859 | numchars += printf("=%p",raxGetData(n)); | |||
1860 | } | |||
1861 | ||||
1862 | int numchildren = n->iscompr ? 1 : n->size; | |||
1863 | /* Note that 7 and 4 magic constants are the string length | |||
1864 | * of " `-(x) " and " -> " respectively. */ | |||
1865 | if (level) { | |||
1866 | lpad += (numchildren > 1) ? 7 : 4; | |||
1867 | if (numchildren == 1) lpad += numchars; | |||
1868 | } | |||
1869 | raxNode **cp = raxNodeFirstChildPtr(n)((raxNode**) ( (n)->data + (n)->size + ((sizeof(void*)- (((n)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) ); | |||
1870 | for (int i = 0; i < numchildren; i++) { | |||
1871 | char *branch = " `-(%c) "; | |||
1872 | if (numchildren > 1) { | |||
1873 | printf("\n"); | |||
1874 | for (int j = 0; j < lpad; j++) putchar(' '); | |||
1875 | printf(branch,n->data[i]); | |||
1876 | } else { | |||
1877 | printf(" -> "); | |||
1878 | } | |||
1879 | raxNode *child; | |||
1880 | memcpy(&child,cp,sizeof(child)); | |||
1881 | raxRecursiveShow(level+1,lpad,child); | |||
1882 | cp++; | |||
1883 | } | |||
1884 | } | |||
1885 | ||||
1886 | /* Show a tree, as outlined in the comment above. */ | |||
1887 | void raxShow(rax *rax) { | |||
1888 | raxRecursiveShow(0,0,rax->head); | |||
1889 | putchar('\n'); | |||
1890 | } | |||
1891 | ||||
1892 | /* Used by debugnode() macro to show info about a given node. */ | |||
1893 | void raxDebugShowNode(const char *msg, raxNode *n) { | |||
1894 | if (raxDebugMsg == 0) return; | |||
1895 | printf("%s: %p [%.*s] key:%u size:%u children:", | |||
1896 | msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size); | |||
1897 | int numcld = n->iscompr ? 1 : n->size; | |||
1898 | raxNode **cldptr = raxNodeLastChildPtr(n)((raxNode**) ( ((char*)(n)) + ( sizeof(raxNode)+(n)->size+ ((sizeof(void*)-(((n)->size+4) % sizeof(void*))) & (sizeof (void*)-1))+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode *)*(n)->size)+ (((n)->iskey && !(n)->isnull) *sizeof(void*)) ) - sizeof(raxNode*) - (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) )) - (numcld-1); | |||
1899 | while(numcld--) { | |||
1900 | raxNode *child; | |||
1901 | memcpy(&child,cldptr,sizeof(child)); | |||
1902 | cldptr++; | |||
1903 | printf("%p ", (void*)child); | |||
1904 | } | |||
1905 | printf("\n"); | |||
1906 | fflush(stdoutstdout); | |||
1907 | } | |||
1908 | ||||
1909 | /* Touch all the nodes of a tree returning a check sum. This is useful | |||
1910 | * in order to make Valgrind detect if there is something wrong while | |||
1911 | * reading the data structure. | |||
1912 | * | |||
1913 | * This function was used in order to identify Rax bugs after a big refactoring | |||
1914 | * using this technique: | |||
1915 | * | |||
1916 | * 1. The rax-test is executed using Valgrind, adding a printf() so that for | |||
1917 | * the fuzz tester we see what iteration in the loop we are in. | |||
1918 | * 2. After every modification of the radix tree made by the fuzz tester | |||
1919 | * in rax-test.c, we add a call to raxTouch(). | |||
1920 | * 3. Now as soon as an operation will corrupt the tree, raxTouch() will | |||
1921 | * detect it (via Valgrind) immediately. We can add more calls to narrow | |||
1922 | * the state. | |||
1923 | * 4. At this point a good idea is to enable Rax debugging messages immediately | |||
1924 | * before the moment the tree is corrupted, to see what happens. | |||
1925 | */ | |||
1926 | unsigned long raxTouch(raxNode *n) { | |||
1927 | debugf("Touching %p\n", (void*)n); | |||
1928 | unsigned long sum = 0; | |||
1929 | if (n->iskey) { | |||
1930 | sum += (unsigned long)raxGetData(n); | |||
1931 | } | |||
1932 | ||||
1933 | int numchildren = n->iscompr ? 1 : n->size; | |||
1934 | raxNode **cp = raxNodeFirstChildPtr(n)((raxNode**) ( (n)->data + (n)->size + ((sizeof(void*)- (((n)->size+4) % sizeof(void*))) & (sizeof(void*)-1))) ); | |||
1935 | int count = 0; | |||
1936 | for (int i = 0; i < numchildren; i++) { | |||
1937 | if (numchildren > 1) { | |||
1938 | sum += (long)n->data[i]; | |||
1939 | } | |||
1940 | raxNode *child; | |||
1941 | memcpy(&child,cp,sizeof(child)); | |||
1942 | if (child == (void*)0x65d1760) count++; | |||
1943 | if (count > 1) exit(1); | |||
1944 | sum += raxTouch(child); | |||
1945 | cp++; | |||
1946 | } | |||
1947 | return sum; | |||
1948 | } |