PostgreSQL Source Code  git master
radixtree.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * radixtree.h
4  * Template for adaptive radix tree.
5  *
6  * A template to generate an "adaptive radix tree", specialized for value
7  * types and for local/shared memory.
8  *
9  * The concept originates from the paper "The Adaptive Radix Tree: ARTful
10  * Indexing for Main-Memory Databases" by Viktor Leis, Alfons Kemper,
11  * and Thomas Neumann, 2013.
12  *
13  * Radix trees have some advantages over hash tables:
14  * - The keys are logically ordered, allowing efficient sorted iteration
15  * and range queries
16  * - Operations using keys that are lexicographically close together
17  * will have favorable memory locality
18  * - Memory use grows gradually rather than by doubling
19  * - The key does not need to be stored with the value, since the key
20  * is implicitly contained in the path to the value
21  *
22  * Some disadvantages are:
23  * - Point queries (along with insertion and deletion) are slower than
24  * a linear probing hash table as in simplehash.h
25  * - Memory usage varies by key distribution, so is difficult to predict
26  *
27  * A classic radix tree consists of nodes, each containing an array of
28  * pointers to child nodes. The size of the array is determined by the
29  * "span" of the tree, which is the number of bits of the key used to
30  * index into the array. For example, with a span of 6, a "chunk"
31  * of 6 bits is extracted from the key at each node traversal, and
32  * the arrays thus have a "fanout" of 2^6 or 64 entries. A large span
33  * allows a shorter tree, but requires larger arrays that may be mostly
34  * wasted space.
35  *
36  * The key idea of the adaptive radix tree is to choose different
37  * data structures based on the number of child nodes. A node will
38  * start out small when it is first populated, and when it is full,
39  * it is replaced by the next larger size. Conversely, when a node
40  * becomes mostly empty, it is replaced by the next smaller node. The
41  * bulk of the code complexity in this module stems from this dynamic
42  * switching. One mitigating factor is using a span of 8, since bytes
43  * are directly addressable.
44  *
45  * The ART paper mentions three ways to implement leaves:
46  *
47  * "- Single-value leaves: The values are stored using an addi-
48  * tional leaf node type which stores one value.
49  * - Multi-value leaves: The values are stored in one of four
50  * different leaf node types, which mirror the structure of
51  * inner nodes, but contain values instead of pointers.
52  * - Combined pointer/value slots: If values fit into point-
53  * ers, no separate node types are necessary. Instead, each
54  * pointer storage location in an inner node can either
55  * store a pointer or a value."
56  *
57  * We use a form of "combined pointer/value slots", as recommended. Values
58  * of size (if fixed at compile time) equal or smaller than the platform's
59  * pointer type are stored in the child slots of the last level node,
60  * while larger values are the same as "single-value" leaves above. This
61  * offers flexibility and efficiency. Variable-length types are currently
62  * treated as single-value leaves for simplicity, but future work may
63  * allow those to be stored in the child pointer arrays, when they're
64  * small enough.
65  *
66  * There are two other techniques described in the paper that are not
67  * implemented here:
68  * - path compression "...removes all inner nodes that have only a single child."
69  * - lazy path expansion "...inner nodes are only created if they are required
70  * to distinguish at least two leaf nodes."
71  *
72  * We do have a form of "poor man's path compression", however, enabled by
73  * only supporting unsigned integer keys (for now assumed to be 64-bit):
74  * A tree doesn't contain paths where the highest bytes of all keys are
75  * zero. That way, the tree's height adapts to the distribution of keys.
76  *
77  * To handle concurrency, we use a single reader-writer lock for the
78  * radix tree. If concurrent write operations are possible, the tree
79  * must be exclusively locked during write operations such as RT_SET()
80  * and RT_DELETE(), and share locked during read operations such as
81  * RT_FIND() and RT_BEGIN_ITERATE().
82  *
83  * TODO: The current locking mechanism is not optimized for high
84  * concurrency with mixed read-write workloads. In the future it might
85  * be worthwhile to replace it with the Optimistic Lock Coupling or
86  * ROWEX mentioned in the paper "The ART of Practical Synchronization"
87  * by the same authors as the ART paper, 2016.
88  *
89  * To generate a radix tree and associated functions for a use case
90  * several macros have to be #define'ed before this file is included.
91  * Including the file #undef's all those, so a new radix tree can be
92  * generated afterwards.
93  *
94  * The relevant parameters are:
95  * - RT_PREFIX - prefix for all symbol names generated. A prefix of "foo"
96  * will result in radix tree type "foo_radix_tree" and functions like
97  * "foo_create"/"foo_free" and so forth.
98  * - RT_DECLARE - if defined function prototypes and type declarations are
99  * generated
100  * - RT_DEFINE - if defined function definitions are generated
101  * - RT_SCOPE - in which scope (e.g. extern, static inline) do function
102  * declarations reside
103  * - RT_VALUE_TYPE - the type of the value.
104  * - RT_VARLEN_VALUE_SIZE() - for variable length values, an expression
105  * involving a pointer to the value type, to calculate size.
106  * NOTE: implies that the value is in fact variable-length,
107  * so do not set for fixed-length values.
108  * - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
109  * storing the value in a child pointer slot, rather than as a single-
110  * value leaf, if small enough. This requires that the value, when
111  * read as a child pointer, can be tagged in the lowest bit.
112  *
113  * Optional parameters:
114  * - RT_SHMEM - if defined, the radix tree is created in the DSA area
115  * so that multiple processes can access it simultaneously.
116  * - RT_DEBUG - if defined add stats tracking and debugging functions
117  *
118  * Interface
119  * ---------
120  *
121  * RT_CREATE - Create a new, empty radix tree
122  * RT_FREE - Free the radix tree
123  * RT_FIND - Lookup the value for a given key
124  * RT_SET - Set a key-value pair
125  * RT_BEGIN_ITERATE - Begin iterating through all key-value pairs
126  * RT_ITERATE_NEXT - Return next key-value pair, if any
127  * RT_END_ITERATE - End iteration
128  * RT_MEMORY_USAGE - Get the memory as measured by space in memory context blocks
129  *
130  * Interface for Shared Memory
131  * ---------
132  *
133  * RT_ATTACH - Attach to the radix tree
134  * RT_DETACH - Detach from the radix tree
135  * RT_LOCK_EXCLUSIVE - Lock the radix tree in exclusive mode
136  * RT_LOCK_SHARE - Lock the radix tree in share mode
137  * RT_UNLOCK - Unlock the radix tree
138  * RT_GET_HANDLE - Return the handle of the radix tree
139  *
140  * Optional Interface
141  * ---------
142  *
143  * RT_DELETE - Delete a key-value pair. Declared/defined if RT_USE_DELETE is defined
144  *
145  *
146  * Copyright (c) 2024, PostgreSQL Global Development Group
147  *
148  * IDENTIFICATION
149  * src/include/lib/radixtree.h
150  *
151  *-------------------------------------------------------------------------
152  */
153 
154 #include "postgres.h"
155 
156 #include "nodes/bitmapset.h"
157 #include "port/pg_bitutils.h"
158 #include "port/simd.h"
159 #include "utils/dsa.h"
160 #include "utils/memutils.h"
161 
162 /* helpers */
163 #define RT_MAKE_PREFIX(a) CppConcat(a,_)
164 #define RT_MAKE_NAME(name) RT_MAKE_NAME_(RT_MAKE_PREFIX(RT_PREFIX),name)
165 #define RT_MAKE_NAME_(a,b) CppConcat(a,b)
166 /*
167  * stringify a macro constant, from https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html
168  */
169 #define RT_STR(s) RT_STR_(s)
170 #define RT_STR_(s) #s
171 
172 /* function declarations */
173 #define RT_CREATE RT_MAKE_NAME(create)
174 #define RT_FREE RT_MAKE_NAME(free)
175 #define RT_FIND RT_MAKE_NAME(find)
176 #ifdef RT_SHMEM
177 #define RT_ATTACH RT_MAKE_NAME(attach)
178 #define RT_DETACH RT_MAKE_NAME(detach)
179 #define RT_GET_HANDLE RT_MAKE_NAME(get_handle)
180 #define RT_LOCK_EXCLUSIVE RT_MAKE_NAME(lock_exclusive)
181 #define RT_LOCK_SHARE RT_MAKE_NAME(lock_share)
182 #define RT_UNLOCK RT_MAKE_NAME(unlock)
183 #endif
184 #define RT_SET RT_MAKE_NAME(set)
185 #define RT_BEGIN_ITERATE RT_MAKE_NAME(begin_iterate)
186 #define RT_ITERATE_NEXT RT_MAKE_NAME(iterate_next)
187 #define RT_END_ITERATE RT_MAKE_NAME(end_iterate)
188 #ifdef RT_USE_DELETE
189 #define RT_DELETE RT_MAKE_NAME(delete)
190 #endif
191 #define RT_MEMORY_USAGE RT_MAKE_NAME(memory_usage)
192 #define RT_DUMP_NODE RT_MAKE_NAME(dump_node)
193 #define RT_STATS RT_MAKE_NAME(stats)
194 
195 /* internal helper functions (no externally visible prototypes) */
196 #define RT_CHILDPTR_IS_VALUE RT_MAKE_NAME(childptr_is_value)
197 #define RT_VALUE_IS_EMBEDDABLE RT_MAKE_NAME(value_is_embeddable)
198 #define RT_GET_SLOT_RECURSIVE RT_MAKE_NAME(get_slot_recursive)
199 #define RT_DELETE_RECURSIVE RT_MAKE_NAME(delete_recursive)
200 #define RT_ALLOC_NODE RT_MAKE_NAME(alloc_node)
201 #define RT_ALLOC_LEAF RT_MAKE_NAME(alloc_leaf)
202 #define RT_FREE_NODE RT_MAKE_NAME(free_node)
203 #define RT_FREE_LEAF RT_MAKE_NAME(free_leaf)
204 #define RT_FREE_RECURSE RT_MAKE_NAME(free_recurse)
205 #define RT_EXTEND_UP RT_MAKE_NAME(extend_up)
206 #define RT_EXTEND_DOWN RT_MAKE_NAME(extend_down)
207 #define RT_COPY_COMMON RT_MAKE_NAME(copy_common)
208 #define RT_PTR_SET_LOCAL RT_MAKE_NAME(ptr_set_local)
209 #define RT_NODE_16_SEARCH_EQ RT_MAKE_NAME(node_16_search_eq)
210 #define RT_NODE_4_GET_INSERTPOS RT_MAKE_NAME(node_4_get_insertpos)
211 #define RT_NODE_16_GET_INSERTPOS RT_MAKE_NAME(node_16_get_insertpos)
212 #define RT_SHIFT_ARRAYS_FOR_INSERT RT_MAKE_NAME(shift_arrays_for_insert)
213 #define RT_SHIFT_ARRAYS_AND_DELETE RT_MAKE_NAME(shift_arrays_and_delete)
214 #define RT_COPY_ARRAYS_FOR_INSERT RT_MAKE_NAME(copy_arrays_for_insert)
215 #define RT_COPY_ARRAYS_AND_DELETE RT_MAKE_NAME(copy_arrays_and_delete)
216 #define RT_NODE_48_IS_CHUNK_USED RT_MAKE_NAME(node_48_is_chunk_used)
217 #define RT_NODE_48_GET_CHILD RT_MAKE_NAME(node_48_get_child)
218 #define RT_NODE_256_IS_CHUNK_USED RT_MAKE_NAME(node_256_is_chunk_used)
219 #define RT_NODE_256_GET_CHILD RT_MAKE_NAME(node_256_get_child)
220 #define RT_KEY_GET_SHIFT RT_MAKE_NAME(key_get_shift)
221 #define RT_SHIFT_GET_MAX_VAL RT_MAKE_NAME(shift_get_max_val)
222 #define RT_NODE_SEARCH RT_MAKE_NAME(node_search)
223 #define RT_NODE_DELETE RT_MAKE_NAME(node_delete)
224 #define RT_NODE_INSERT RT_MAKE_NAME(node_insert)
225 #define RT_ADD_CHILD_4 RT_MAKE_NAME(add_child_4)
226 #define RT_ADD_CHILD_16 RT_MAKE_NAME(add_child_16)
227 #define RT_ADD_CHILD_48 RT_MAKE_NAME(add_child_48)
228 #define RT_ADD_CHILD_256 RT_MAKE_NAME(add_child_256)
229 #define RT_GROW_NODE_4 RT_MAKE_NAME(grow_node_4)
230 #define RT_GROW_NODE_16 RT_MAKE_NAME(grow_node_16)
231 #define RT_GROW_NODE_48 RT_MAKE_NAME(grow_node_48)
232 #define RT_REMOVE_CHILD_4 RT_MAKE_NAME(remove_child_4)
233 #define RT_REMOVE_CHILD_16 RT_MAKE_NAME(remove_child_16)
234 #define RT_REMOVE_CHILD_48 RT_MAKE_NAME(remove_child_48)
235 #define RT_REMOVE_CHILD_256 RT_MAKE_NAME(remove_child_256)
236 #define RT_SHRINK_NODE_16 RT_MAKE_NAME(shrink_child_16)
237 #define RT_SHRINK_NODE_48 RT_MAKE_NAME(shrink_child_48)
238 #define RT_SHRINK_NODE_256 RT_MAKE_NAME(shrink_child_256)
239 #define RT_NODE_ITERATE_NEXT RT_MAKE_NAME(node_iterate_next)
240 #define RT_VERIFY_NODE RT_MAKE_NAME(verify_node)
241 
242 /* type declarations */
243 #define RT_RADIX_TREE RT_MAKE_NAME(radix_tree)
244 #define RT_RADIX_TREE_CONTROL RT_MAKE_NAME(radix_tree_control)
245 #define RT_ITER RT_MAKE_NAME(iter)
246 #ifdef RT_SHMEM
247 #define RT_HANDLE RT_MAKE_NAME(handle)
248 #endif
249 #define RT_NODE RT_MAKE_NAME(node)
250 #define RT_CHILD_PTR RT_MAKE_NAME(child_ptr)
251 #define RT_NODE_ITER RT_MAKE_NAME(node_iter)
252 #define RT_NODE_4 RT_MAKE_NAME(node_4)
253 #define RT_NODE_16 RT_MAKE_NAME(node_16)
254 #define RT_NODE_48 RT_MAKE_NAME(node_48)
255 #define RT_NODE_256 RT_MAKE_NAME(node_256)
256 #define RT_SIZE_CLASS RT_MAKE_NAME(size_class)
257 #define RT_SIZE_CLASS_ELEM RT_MAKE_NAME(size_class_elem)
258 #define RT_SIZE_CLASS_INFO RT_MAKE_NAME(size_class_info)
259 #define RT_CLASS_4 RT_MAKE_NAME(class_4)
260 #define RT_CLASS_16_LO RT_MAKE_NAME(class_32_min)
261 #define RT_CLASS_16_HI RT_MAKE_NAME(class_32_max)
262 #define RT_CLASS_48 RT_MAKE_NAME(class_48)
263 #define RT_CLASS_256 RT_MAKE_NAME(class_256)
264 
265 /* generate forward declarations necessary to use the radix tree */
266 #ifdef RT_DECLARE
267 
268 typedef struct RT_RADIX_TREE RT_RADIX_TREE;
269 typedef struct RT_ITER RT_ITER;
270 
271 #ifdef RT_SHMEM
272 typedef dsa_pointer RT_HANDLE;
273 #endif
274 
275 #ifdef RT_SHMEM
276 RT_SCOPE RT_RADIX_TREE *RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id);
277 RT_SCOPE RT_RADIX_TREE *RT_ATTACH(dsa_area *dsa, dsa_pointer dp);
278 RT_SCOPE void RT_DETACH(RT_RADIX_TREE * tree);
279 RT_SCOPE RT_HANDLE RT_GET_HANDLE(RT_RADIX_TREE * tree);
280 RT_SCOPE void RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree);
281 RT_SCOPE void RT_LOCK_SHARE(RT_RADIX_TREE * tree);
282 RT_SCOPE void RT_UNLOCK(RT_RADIX_TREE * tree);
283 #else
285 #endif
287 
289 RT_SCOPE bool RT_SET(RT_RADIX_TREE * tree, uint64 key, RT_VALUE_TYPE * value_p);
290 
291 #ifdef RT_USE_DELETE
292 RT_SCOPE bool RT_DELETE(RT_RADIX_TREE * tree, uint64 key);
293 #endif
294 
296 RT_SCOPE RT_VALUE_TYPE *RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p);
297 RT_SCOPE void RT_END_ITERATE(RT_ITER * iter);
298 
300 
301 #ifdef RT_DEBUG
303 #endif
304 
305 #endif /* RT_DECLARE */
306 
307 
308 /* generate implementation of the radix tree */
309 #ifdef RT_DEFINE
310 
311 /* The number of bits encoded in one tree level */
312 #define RT_SPAN BITS_PER_BYTE
313 
314 /*
315  * The number of possible partial keys, and thus the maximum number of
316  * child pointers, for a node.
317  */
318 #define RT_NODE_MAX_SLOTS (1 << RT_SPAN)
319 
320 /* Mask for extracting a chunk from a key */
321 #define RT_CHUNK_MASK ((1 << RT_SPAN) - 1)
322 
323 /* Maximum shift needed to extract a chunk from a key */
324 #define RT_MAX_SHIFT RT_KEY_GET_SHIFT(UINT64_MAX)
325 
326 /* Maximum level a tree can reach for a key */
327 #define RT_MAX_LEVEL ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN)
328 
329 /* Get a chunk from the key */
330 #define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & RT_CHUNK_MASK))
331 
332 /* For accessing bitmaps */
333 #define RT_BM_IDX(x) ((x) / BITS_PER_BITMAPWORD)
334 #define RT_BM_BIT(x) ((x) % BITS_PER_BITMAPWORD)
335 
336 /*
337  * Node kinds
338  *
339  * The different node kinds are what make the tree "adaptive".
340  *
341  * Each node kind is associated with a different datatype and different
342  * search/set/delete/iterate algorithms adapted for its size. The largest
343  * kind, node256 is basically the same as a traditional radix tree,
344  * and would be most wasteful of memory when sparsely populated. The
345  * smaller nodes expend some additional CPU time to enable a smaller
346  * memory footprint.
347  *
348  * NOTE: There are 4 node kinds, and this should never be increased,
349  * for several reasons:
350  * 1. With 5 or more kinds, gcc tends to use a jump table for switch
351  * statements.
352  * 2. The 4 kinds can be represented with 2 bits, so we have the option
353  * in the future to tag the node pointer with the kind, even on
354  * platforms with 32-bit pointers. That would touch fewer cache lines
355  * during traversal and allow faster recovery from branch mispredicts.
356  * 3. We can have multiple size classes per node kind.
357  */
358 #define RT_NODE_KIND_4 0x00
359 #define RT_NODE_KIND_16 0x01
360 #define RT_NODE_KIND_48 0x02
361 #define RT_NODE_KIND_256 0x03
362 #define RT_NODE_KIND_COUNT 4
363 
364 /*
365  * Calculate the slab block size so that we can allocate at least 32 chunks
366  * from the block.
367  */
368 #define RT_SLAB_BLOCK_SIZE(size) \
369  Max(SLAB_DEFAULT_BLOCK_SIZE, pg_nextpower2_32(size * 32))
370 
371 /* Common header for all nodes */
372 typedef struct RT_NODE
373 {
374  /* Node kind, one per search/set algorithm */
376 
377  /*
378  * Max capacity for the current size class. Storing this in the node
379  * enables multiple size classes per node kind. uint8 is sufficient for
380  * all node kinds, because we only use this number to test if the node
381  * needs to grow. Since node256 never needs to grow, we let this overflow
382  * to zero.
383  */
385 
386  /*
387  * Number of children. uint8 is sufficient for all node kinds, because
388  * nodes shrink when this number gets lower than some threshold. Since
389  * node256 cannot possibly have zero children, we let the counter overflow
390  * and we interpret zero as "256" for this node kind.
391  */
394 
395 
396 /* pointer returned by allocation */
397 #ifdef RT_SHMEM
398 #define RT_PTR_ALLOC dsa_pointer
399 #define RT_INVALID_PTR_ALLOC InvalidDsaPointer
400 #define RT_PTR_ALLOC_IS_VALID(ptr) DsaPointerIsValid(ptr)
401 #else
402 #define RT_PTR_ALLOC RT_NODE *
403 #define RT_INVALID_PTR_ALLOC NULL
404 #define RT_PTR_ALLOC_IS_VALID(ptr) PointerIsValid(ptr)
405 #endif
406 
407 /*
408  * A convenience type used when we need to work with a DSA pointer as well
409  * as its local pointer. For local memory, both members are the same, so
410  * we use a union.
411  */
412 #ifdef RT_SHMEM
413 typedef struct RT_CHILD_PTR
414 #else
415 typedef union RT_CHILD_PTR
416 #endif
417 {
421 
422 
423 /*
424  * Helper macros and functions for value storage.
425  * We either embed values in the child slots of the last level
426  * node or store pointers to values to the child slots,
427  * depending on the value size.
428  */
429 
430 #ifdef RT_VARLEN_VALUE_SIZE
431 #define RT_GET_VALUE_SIZE(v) RT_VARLEN_VALUE_SIZE(v)
432 #else
433 #define RT_GET_VALUE_SIZE(v) sizeof(RT_VALUE_TYPE)
434 #endif
435 
436 /*
437  * Return true if the value can be stored in the child array
438  * of the lowest-level node, false otherwise.
439  */
440 static inline bool
442 {
443 #ifdef RT_VARLEN_VALUE_SIZE
444 
445 #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
446  return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
447 #else
448  return false;
449 #endif
450 
451 #else
452  return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
453 #endif
454 }
455 
456 /*
457  * Return true if the child pointer contains the value, false
458  * if the child pointer is a leaf pointer.
459  */
460 static inline bool
462 {
463 #ifdef RT_VARLEN_VALUE_SIZE
464 
465 #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
466  /* check for pointer tag */
467 #ifdef RT_SHMEM
468  return child & 1;
469 #else
470  return ((uintptr_t) child) & 1;
471 #endif
472 
473 #else
474  return false;
475 #endif
476 
477 #else
478  return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
479 #endif
480 }
481 
482 /*
483  * Symbols for maximum possible fanout are declared first as they are
484  * required to declare each node kind. The declarations of other fanout
485  * values are followed as they need the struct sizes of each node kind.
486  */
487 
488 /* max possible key chunks without struct padding */
489 #define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE))
490 
491 /* equal to two 128-bit SIMD registers, regardless of availability */
492 #define RT_FANOUT_16_MAX 32
493 
494 /*
495  * This also determines the number of bits necessary for the isset array,
496  * so we need to be mindful of the size of bitmapword. Since bitmapword
497  * can be 64 bits, the only values that make sense here are 64 and 128.
498  * The ART paper uses at most 64 for this node kind, and one advantage
499  * for us is that "isset" is a single bitmapword on most platforms,
500  * rather than an array, allowing the compiler to get rid of loops.
501  */
502 #define RT_FANOUT_48_MAX 64
503 
504 #define RT_FANOUT_256 RT_NODE_MAX_SLOTS
505 
506 /*
507  * Node structs, one for each "kind"
508  */
509 
510 /*
511  * node4 and node16 use one array for key chunks and another
512  * array of the same length for children. The keys and children
513  * are stored at corresponding positions, sorted by chunk.
514  */
515 
516 typedef struct RT_NODE_4
517 {
519 
521 
522  /* number of children depends on size class */
525 
526 typedef struct RT_NODE_16
527 {
529 
531 
532  /* number of children depends on size class */
535 
536 /*
537  * node48 uses a 256-element array indexed by key chunks. This array
538  * stores indexes into a second array containing the children.
539  */
540 typedef struct RT_NODE_48
541 {
543 
544  /* bitmap to track which slots are in use */
546 
547  /*
548  * Lookup table for indexes into the children[] array. We make this the
549  * last fixed-size member so that it's convenient to memset separately
550  * from the previous members.
551  */
553 
554 /* Invalid index */
555 #define RT_INVALID_SLOT_IDX 0xFF
556 
557  /* number of children depends on size class */
560 
561 /*
562  * node256 is the largest node type. This node has an array of
563  * children directly indexed by chunk. Unlike other node kinds,
564  * its array size is by definition fixed.
565  */
566 typedef struct RT_NODE_256
567 {
569 
570  /* bitmap to track which slots are in use */
572 
573  /* slots for 256 children */
576 
577 #if defined(RT_SHMEM)
578 /*
579  * Make sure the all nodes (except for node256) fit neatly into a DSA
580  * size class. We assume the RT_FANOUT_4 is in the range where DSA size
581  * classes increment by 8 (as of PG17 up to 64 bytes), so we just hard
582  * code that one.
583  */
584 
585 #if SIZEOF_DSA_POINTER < 8
586 #define RT_FANOUT_16_LO ((96 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
587 #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
588 #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
589 #else
590 #define RT_FANOUT_16_LO ((160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
591 #define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (320 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
592 #define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC))
593 #endif /* SIZEOF_DSA_POINTER < 8 */
594 
595 #else /* ! RT_SHMEM */
596 
597 /* doesn't really matter, but may as well use the namesake */
598 #define RT_FANOUT_16_LO 16
599 /* use maximum possible */
600 #define RT_FANOUT_16_HI RT_FANOUT_16_MAX
601 #define RT_FANOUT_48 RT_FANOUT_48_MAX
602 
603 #endif /* RT_SHMEM */
604 
605 /*
606  * To save memory in trees with sparse keys, it would make sense to have two
607  * size classes for the smallest kind (perhaps a high class of 5 and a low class
608  * of 2), but it would be more effective to utilize lazy expansion and
609  * path compression.
610  */
611 #define RT_FANOUT_4 4
612 
613 StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding");
614 StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than HI");
615 StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset bits");
616 
617 /*
618  * Node size classes
619  *
620  * Nodes of different kinds necessarily belong to different size classes.
621  * One innovation in our implementation compared to the ART paper is
622  * decoupling the notion of size class from kind.
623  *
624  * The size classes within a given node kind have the same underlying
625  * type, but a variable number of children/values. This is possible
626  * because each type (except node256) contains metadata that work the
627  * same way regardless of how many child slots there are. The nodes
628  * can introspect their allocated capacity at runtime.
629  */
630 typedef enum RT_SIZE_CLASS
631 {
638 
639 /* Information for each size class */
640 typedef struct RT_SIZE_CLASS_ELEM
641 {
642  const char *name;
643  int fanout;
644  size_t allocsize;
646 
647 
649  [RT_CLASS_4] = {
650  .name = RT_STR(RT_PREFIX) "_radix_tree node4",
651  .fanout = RT_FANOUT_4,
652  .allocsize = sizeof(RT_NODE_4) + RT_FANOUT_4 * sizeof(RT_PTR_ALLOC),
653  },
654  [RT_CLASS_16_LO] = {
655  .name = RT_STR(RT_PREFIX) "_radix_tree node16_lo",
656  .fanout = RT_FANOUT_16_LO,
657  .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_LO * sizeof(RT_PTR_ALLOC),
658  },
659  [RT_CLASS_16_HI] = {
660  .name = RT_STR(RT_PREFIX) "_radix_tree node16_hi",
661  .fanout = RT_FANOUT_16_HI,
662  .allocsize = sizeof(RT_NODE_16) + RT_FANOUT_16_HI * sizeof(RT_PTR_ALLOC),
663  },
664  [RT_CLASS_48] = {
665  .name = RT_STR(RT_PREFIX) "_radix_tree node48",
666  .fanout = RT_FANOUT_48,
667  .allocsize = sizeof(RT_NODE_48) + RT_FANOUT_48 * sizeof(RT_PTR_ALLOC),
668  },
669  [RT_CLASS_256] = {
670  .name = RT_STR(RT_PREFIX) "_radix_tree node256",
671  .fanout = RT_FANOUT_256,
672  .allocsize = sizeof(RT_NODE_256),
673  },
674 };
675 
676 #define RT_NUM_SIZE_CLASSES lengthof(RT_SIZE_CLASS_INFO)
677 
678 #ifdef RT_SHMEM
679 /* A magic value used to identify our radix tree */
680 #define RT_RADIX_TREE_MAGIC 0x54A48167
681 #endif
682 
683 /* Contains the actual tree, plus ancillary info */
684 typedef struct RT_RADIX_TREE_CONTROL
685 {
686 #ifdef RT_SHMEM
687  RT_HANDLE handle;
688  uint32 magic;
689  LWLock lock;
690 #endif
691 
693  uint64 max_val;
694  int64 num_keys;
696 
697  /* statistics */
698 #ifdef RT_DEBUG
699  int64 num_nodes[RT_NUM_SIZE_CLASSES];
700  int64 num_leaves;
701 #endif
703 
704 /* Entry point for allocating and accessing the tree */
706 {
708 
709  /* pointing to either local memory or DSA */
711 
712 #ifdef RT_SHMEM
713  dsa_area *dsa;
714 #else
716 
717  /* leaf_context is used only for single-value leaves */
719 #endif
721 };
722 
723 /*
724  * Iteration support.
725  *
726  * Iterating over the radix tree produces each key/value pair in ascending
727  * order of the key.
728  */
729 
730 /* state for iterating over a single node */
731 typedef struct RT_NODE_ITER
732 {
734 
735  /*
736  * The next index of the chunk array in RT_NODE_KIND_4 and RT_NODE_KIND_16
737  * nodes, or the next chunk in RT_NODE_KIND_48 and RT_NODE_KIND_256 nodes.
738  * 0 for the initial value.
739  */
740  int idx;
742 
743 /* state for iterating over the whole radix tree */
744 struct RT_ITER
745 {
747 
748  /*
749  * A stack to track iteration for each level. Level 0 is the lowest (or
750  * leaf) level
751  */
755 
756  /* The key constructed during iteration */
757  uint64 key;
758 };
759 
760 
761 /* verification (available only in assert-enabled builds) */
762 static void RT_VERIFY_NODE(RT_NODE * node);
763 
764 static inline void
766 {
767 #ifdef RT_SHMEM
768  node->local = dsa_get_address(tree->dsa, node->alloc);
769 #endif
770 }
771 
772 /* Convenience functions for node48 and node256 */
773 
774 /* Return true if there is an entry for "chunk" */
775 static inline bool
777 {
778  return node->slot_idxs[chunk] != RT_INVALID_SLOT_IDX;
779 }
780 
781 static inline RT_PTR_ALLOC *
783 {
784  return &node->children[node->slot_idxs[chunk]];
785 }
786 
787 /* Return true if there is an entry for "chunk" */
788 static inline bool
790 {
791  int idx = RT_BM_IDX(chunk);
792  int bitnum = RT_BM_BIT(chunk);
793 
794  return (node->isset[idx] & ((bitmapword) 1 << bitnum)) != 0;
795 }
796 
797 static inline RT_PTR_ALLOC *
799 {
801  return &node->children[chunk];
802 }
803 
804 /*
805  * Return the smallest shift that will allowing storing the given key.
806  */
807 static inline int
809 {
810  if (key == 0)
811  return 0;
812  else
814 }
815 
816 /*
817  * Return the max value that can be stored in the tree with the given shift.
818  */
819 static uint64
821 {
822  if (shift == RT_MAX_SHIFT)
823  return UINT64_MAX;
824  else
825  return (UINT64CONST(1) << (shift + RT_SPAN)) - 1;
826 }
827 
828 /*
829  * Allocate a new node with the given node kind and size class.
830  */
831 static inline RT_CHILD_PTR
832 RT_ALLOC_NODE(RT_RADIX_TREE * tree, const uint8 kind, const RT_SIZE_CLASS size_class)
833 {
834  RT_CHILD_PTR allocnode;
835  RT_NODE *node;
836  size_t allocsize;
837 
838  allocsize = RT_SIZE_CLASS_INFO[size_class].allocsize;
839 
840 #ifdef RT_SHMEM
841  allocnode.alloc = dsa_allocate(tree->dsa, allocsize);
842 #else
843  allocnode.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->node_slabs[size_class],
844  allocsize);
845 #endif
846 
847  RT_PTR_SET_LOCAL(tree, &allocnode);
848  node = allocnode.local;
849 
850  /* initialize contents */
851 
852  switch (kind)
853  {
854  case RT_NODE_KIND_4:
855  memset(node, 0, offsetof(RT_NODE_4, children));
856  break;
857  case RT_NODE_KIND_16:
858  memset(node, 0, offsetof(RT_NODE_16, children));
859  break;
860  case RT_NODE_KIND_48:
861  {
862  RT_NODE_48 *n48 = (RT_NODE_48 *) node;
863 
864  memset(n48, 0, offsetof(RT_NODE_48, slot_idxs));
865  memset(n48->slot_idxs, RT_INVALID_SLOT_IDX, sizeof(n48->slot_idxs));
866  break;
867  }
868  case RT_NODE_KIND_256:
869  memset(node, 0, offsetof(RT_NODE_256, children));
870  break;
871  default:
872  pg_unreachable();
873  }
874 
875  node->kind = kind;
876 
877  /*
878  * For node256, this will actually overflow to zero, but that's okay
879  * because that node doesn't need to introspect this value.
880  */
881  node->fanout = RT_SIZE_CLASS_INFO[size_class].fanout;
882 
883 #ifdef RT_DEBUG
884  /* update the statistics */
885  tree->ctl->num_nodes[size_class]++;
886 #endif
887 
888  return allocnode;
889 }
890 
891 /*
892  * Allocate a new leaf.
893  */
894 static RT_CHILD_PTR
895 RT_ALLOC_LEAF(RT_RADIX_TREE * tree, size_t allocsize)
896 {
897  RT_CHILD_PTR leaf;
898 
899 #ifdef RT_SHMEM
900  leaf.alloc = dsa_allocate(tree->dsa, allocsize);
901  RT_PTR_SET_LOCAL(tree, &leaf);
902 #else
903  leaf.alloc = (RT_PTR_ALLOC) MemoryContextAlloc(tree->leaf_context, allocsize);
904 #endif
905 
906 #ifdef RT_DEBUG
907  tree->ctl->num_leaves++;
908 #endif
909 
910  return leaf;
911 }
912 
913 /*
914  * Copy relevant members of the node header.
915  * This is a separate function in case other fields are added.
916  */
917 static inline void
919 {
920  (newnode.local)->count = (oldnode.local)->count;
921 }
922 
923 /* Free the given node */
924 static void
926 {
927 #ifdef RT_DEBUG
928  int i;
929 
930  /* update the statistics */
931 
932  for (i = 0; i < RT_NUM_SIZE_CLASSES; i++)
933  {
934  if ((node.local)->fanout == RT_SIZE_CLASS_INFO[i].fanout)
935  break;
936  }
937 
938  /*
939  * The fanout of node256 will appear to be zero within the node header
940  * because of overflow, so we need an extra check here.
941  */
942  if (i == RT_NUM_SIZE_CLASSES)
943  i = RT_CLASS_256;
944 
945  tree->ctl->num_nodes[i]--;
946  Assert(tree->ctl->num_nodes[i] >= 0);
947 #endif
948 
949 #ifdef RT_SHMEM
950  dsa_free(tree->dsa, node.alloc);
951 #else
952  pfree(node.alloc);
953 #endif
954 }
955 
956 static inline void
958 {
959  Assert(leaf != tree->ctl->root);
960 
961 #ifdef RT_DEBUG
962  /* update the statistics */
963  tree->ctl->num_leaves--;
964  Assert(tree->ctl->num_leaves >= 0);
965 #endif
966 
967 #ifdef RT_SHMEM
968  dsa_free(tree->dsa, leaf);
969 #else
970  pfree(leaf);
971 #endif
972 }
973 
974 /***************** SEARCH *****************/
975 
976 /*
977  * Return the address of the child corresponding to "chunk",
978  * or NULL if there is no such element.
979  */
980 static inline RT_PTR_ALLOC *
982 {
983  int count = node->base.count;
984 #ifndef USE_NO_SIMD
985  Vector8 spread_chunk;
986  Vector8 haystack1;
987  Vector8 haystack2;
988  Vector8 cmp1;
989  Vector8 cmp2;
990  uint32 bitfield;
991  RT_PTR_ALLOC *slot_simd = NULL;
992 #endif
993 
994 #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
995  RT_PTR_ALLOC *slot = NULL;
996 
997  for (int i = 0; i < count; i++)
998  {
999  if (node->chunks[i] == chunk)
1000  {
1001  slot = &node->children[i];
1002  break;
1003  }
1004  }
1005 #endif
1006 
1007 #ifndef USE_NO_SIMD
1008  /* replicate the search key */
1009  spread_chunk = vector8_broadcast(chunk);
1010 
1011  /* compare to all 32 keys stored in the node */
1012  vector8_load(&haystack1, &node->chunks[0]);
1013  vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1014  cmp1 = vector8_eq(spread_chunk, haystack1);
1015  cmp2 = vector8_eq(spread_chunk, haystack2);
1016 
1017  /* convert comparison to a bitfield */
1018  bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1019 
1020  /* mask off invalid entries */
1021  bitfield &= ((UINT64CONST(1) << count) - 1);
1022 
1023  /* convert bitfield to index by counting trailing zeros */
1024  if (bitfield)
1025  slot_simd = &node->children[pg_rightmost_one_pos32(bitfield)];
1026 
1027  Assert(slot_simd == slot);
1028  return slot_simd;
1029 #else
1030  return slot;
1031 #endif
1032 }
1033 
1034 /*
1035  * Search for the child pointer corresponding to "key" in the given node.
1036  *
1037  * Return child if the key is found, otherwise return NULL.
1038  */
1039 static inline RT_PTR_ALLOC *
1041 {
1042  /* Make sure we already converted to local pointer */
1043  Assert(node != NULL);
1044 
1045  switch (node->kind)
1046  {
1047  case RT_NODE_KIND_4:
1048  {
1049  RT_NODE_4 *n4 = (RT_NODE_4 *) node;
1050 
1051  for (int i = 0; i < n4->base.count; i++)
1052  {
1053  if (n4->chunks[i] == chunk)
1054  return &n4->children[i];
1055  }
1056  return NULL;
1057  }
1058  case RT_NODE_KIND_16:
1059  return RT_NODE_16_SEARCH_EQ((RT_NODE_16 *) node, chunk);
1060  case RT_NODE_KIND_48:
1061  {
1062  RT_NODE_48 *n48 = (RT_NODE_48 *) node;
1063  int slotpos = n48->slot_idxs[chunk];
1064 
1065  if (slotpos == RT_INVALID_SLOT_IDX)
1066  return NULL;
1067 
1068  return RT_NODE_48_GET_CHILD(n48, chunk);
1069  }
1070  case RT_NODE_KIND_256:
1071  {
1072  RT_NODE_256 *n256 = (RT_NODE_256 *) node;
1073 
1074  if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
1075  return NULL;
1076 
1077  return RT_NODE_256_GET_CHILD(n256, chunk);
1078  }
1079  default:
1080  pg_unreachable();
1081  }
1082 }
1083 
1084 /*
1085  * Search the given key in the radix tree. Return the pointer to the value if found,
1086  * otherwise return NULL.
1087  *
1088  * Since the function returns a pointer (to support variable-length values),
1089  * the caller is responsible for locking until it's finished with the value.
1090  */
1093 {
1094  RT_CHILD_PTR node;
1095  RT_PTR_ALLOC *slot = NULL;
1096  int shift;
1097 
1098 #ifdef RT_SHMEM
1099  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1100 #endif
1101 
1102  if (key > tree->ctl->max_val)
1103  return NULL;
1104 
1105  Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1106  node.alloc = tree->ctl->root;
1107  shift = tree->ctl->start_shift;
1108 
1109  /* Descend the tree */
1110  while (shift >= 0)
1111  {
1112  RT_PTR_SET_LOCAL(tree, &node);
1113  slot = RT_NODE_SEARCH(node.local, RT_GET_KEY_CHUNK(key, shift));
1114  if (slot == NULL)
1115  return NULL;
1116 
1117  node.alloc = *slot;
1118  shift -= RT_SPAN;
1119  }
1120 
1121  if (RT_CHILDPTR_IS_VALUE(*slot))
1122  return (RT_VALUE_TYPE *) slot;
1123  else
1124  {
1125  RT_PTR_SET_LOCAL(tree, &node);
1126  return (RT_VALUE_TYPE *) node.local;
1127  }
1128 }
1129 
1130 /***************** INSERTION *****************/
1131 
1132 #define RT_NODE_MUST_GROW(node) \
1133  ((node)->count == (node)->fanout)
1134 
1135 /*
1136  * Return index of the chunk and slot arrays for inserting into the node,
1137  * such that the arrays remain ordered.
1138  */
1139 static inline int
1141 {
1142  int idx;
1143 
1144  for (idx = 0; idx < count; idx++)
1145  {
1146  if (node->chunks[idx] >= chunk)
1147  break;
1148  }
1149 
1150  return idx;
1151 }
1152 
1153 /*
1154  * Return index of the chunk and slot arrays for inserting into the node,
1155  * such that the arrays remain ordered.
1156  */
1157 static inline int
1159 {
1160  int count = node->base.count;
1161 #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1162  int index;
1163 #endif
1164 
1165 #ifndef USE_NO_SIMD
1166  Vector8 spread_chunk;
1167  Vector8 haystack1;
1168  Vector8 haystack2;
1169  Vector8 cmp1;
1170  Vector8 cmp2;
1171  Vector8 min1;
1172  Vector8 min2;
1173  uint32 bitfield;
1174  int index_simd;
1175 #endif
1176 
1177  /*
1178  * First compare the last element. There are two reasons to branch here:
1179  *
1180  * 1) A realistic pattern is inserting ordered keys. In that case,
1181  * non-SIMD platforms must do a linear search to the last chunk to find
1182  * the insert position. This will get slower as the node fills up.
1183  *
1184  * 2) On SIMD platforms, we must branch anyway to make sure we don't bit
1185  * scan an empty bitfield. Doing the branch here eliminates some work that
1186  * we might otherwise throw away.
1187  */
1188  Assert(count > 0);
1189  if (node->chunks[count - 1] < chunk)
1190  return count;
1191 
1192 #if defined(USE_NO_SIMD) || defined(USE_ASSERT_CHECKING)
1193 
1194  for (index = 0; index < count; index++)
1195  {
1196  if (node->chunks[index] > chunk)
1197  break;
1198  }
1199 #endif
1200 
1201 #ifndef USE_NO_SIMD
1202 
1203  /*
1204  * This is a bit more complicated than RT_NODE_16_SEARCH_EQ(), because no
1205  * unsigned uint8 comparison instruction exists, at least for SSE2. So we
1206  * need to play some trickery using vector8_min() to effectively get >=.
1207  * There'll never be any equal elements in current uses, but that's what
1208  * we get here...
1209  */
1210  spread_chunk = vector8_broadcast(chunk);
1211  vector8_load(&haystack1, &node->chunks[0]);
1212  vector8_load(&haystack2, &node->chunks[sizeof(Vector8)]);
1213  min1 = vector8_min(spread_chunk, haystack1);
1214  min2 = vector8_min(spread_chunk, haystack2);
1215  cmp1 = vector8_eq(spread_chunk, min1);
1216  cmp2 = vector8_eq(spread_chunk, min2);
1217  bitfield = vector8_highbit_mask(cmp1) | (vector8_highbit_mask(cmp2) << sizeof(Vector8));
1218 
1219  Assert((bitfield & ((UINT64CONST(1) << count) - 1)) != 0);
1220  index_simd = pg_rightmost_one_pos32(bitfield);
1221 
1222  Assert(index_simd == index);
1223  return index_simd;
1224 #else
1225  return index;
1226 #endif
1227 }
1228 
1229 /* Shift the elements right at "insertpos" by one */
1230 static inline void
1231 RT_SHIFT_ARRAYS_FOR_INSERT(uint8 *chunks, RT_PTR_ALLOC * children, int count, int insertpos)
1232 {
1233  /*
1234  * This is basically a memmove, but written in a simple loop for speed on
1235  * small inputs.
1236  */
1237  for (int i = count - 1; i >= insertpos; i--)
1238  {
1239  /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
1240 #ifdef __GNUC__
1241  __asm__("");
1242 #endif
1243  chunks[i + 1] = chunks[i];
1244  children[i + 1] = children[i];
1245  }
1246 }
1247 
1248 /*
1249  * Copy both chunk and slot arrays into the right
1250  * place. The caller is responsible for inserting the new element.
1251  */
1252 static inline void
1253 RT_COPY_ARRAYS_FOR_INSERT(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
1254  uint8 *src_chunks, RT_PTR_ALLOC * src_children,
1255  int count, int insertpos)
1256 {
1257  for (int i = 0; i < count; i++)
1258  {
1259  int sourceidx = i;
1260 
1261  /* use a branch-free computation to skip the index of the new element */
1262  int destidx = i + (i >= insertpos);
1263 
1264  dst_chunks[destidx] = src_chunks[sourceidx];
1265  dst_children[destidx] = src_children[sourceidx];
1266  }
1267 }
1268 
1269 static inline RT_PTR_ALLOC *
1271 {
1272  RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
1273  int idx = RT_BM_IDX(chunk);
1274  int bitnum = RT_BM_BIT(chunk);
1275 
1276  /* Mark the slot used for "chunk" */
1277  n256->isset[idx] |= ((bitmapword) 1 << bitnum);
1278 
1279  n256->base.count++;
1280  RT_VERIFY_NODE((RT_NODE *) n256);
1281 
1282  return RT_NODE_256_GET_CHILD(n256, chunk);
1283 }
1284 
1285 static pg_noinline RT_PTR_ALLOC *
1287  uint8 chunk)
1288 {
1289  RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1290  RT_CHILD_PTR newnode;
1291  RT_NODE_256 *new256;
1292  int i = 0;
1293 
1294  /* initialize new node */
1296  new256 = (RT_NODE_256 *) newnode.local;
1297 
1298  /* copy over the entries */
1299  RT_COPY_COMMON(newnode, node);
1300  for (int word_num = 0; word_num < RT_BM_IDX(RT_NODE_MAX_SLOTS); word_num++)
1301  {
1302  bitmapword bitmap = 0;
1303 
1304  /*
1305  * Bit manipulation is a surprisingly large portion of the overhead in
1306  * the naive implementation. Doing stores word-at-a-time removes a lot
1307  * of that overhead.
1308  */
1309  for (int bit = 0; bit < BITS_PER_BITMAPWORD; bit++)
1310  {
1311  uint8 offset = n48->slot_idxs[i];
1312 
1313  if (offset != RT_INVALID_SLOT_IDX)
1314  {
1315  bitmap |= ((bitmapword) 1 << bit);
1316  new256->children[i] = n48->children[offset];
1317  }
1318 
1319  i++;
1320  }
1321 
1322  new256->isset[word_num] = bitmap;
1323  }
1324 
1325  /* free old node and update reference in parent */
1326  *parent_slot = newnode.alloc;
1327  RT_FREE_NODE(tree, node);
1328 
1329  return RT_ADD_CHILD_256(tree, newnode, chunk);
1330 }
1331 
1332 static inline RT_PTR_ALLOC *
1334 {
1335  RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1336  int insertpos;
1337  int idx = 0;
1338  bitmapword w,
1339  inverse;
1340 
1341  /* get the first word with at least one bit not set */
1342  for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++)
1343  {
1344  w = n48->isset[i];
1345  if (w < ~((bitmapword) 0))
1346  {
1347  idx = i;
1348  break;
1349  }
1350  }
1351 
1352  /* To get the first unset bit in w, get the first set bit in ~w */
1353  inverse = ~w;
1354  insertpos = idx * BITS_PER_BITMAPWORD;
1355  insertpos += bmw_rightmost_one_pos(inverse);
1356  Assert(insertpos < n48->base.fanout);
1357 
1358  /* mark the slot used by setting the rightmost zero bit */
1359  n48->isset[idx] |= w + 1;
1360 
1361  /* insert new chunk into place */
1362  n48->slot_idxs[chunk] = insertpos;
1363 
1364  n48->base.count++;
1365  RT_VERIFY_NODE((RT_NODE *) n48);
1366 
1367  return &n48->children[insertpos];
1368 }
1369 
1370 static pg_noinline RT_PTR_ALLOC *
1372  uint8 chunk)
1373 {
1374  RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1375  int insertpos;
1376 
1377  if (n16->base.fanout < RT_FANOUT_16_HI)
1378  {
1379  RT_CHILD_PTR newnode;
1380  RT_NODE_16 *new16;
1381 
1382  Assert(n16->base.fanout == RT_FANOUT_16_LO);
1383 
1384  /* initialize new node */
1386  new16 = (RT_NODE_16 *) newnode.local;
1387 
1388  /* copy over existing entries */
1389  RT_COPY_COMMON(newnode, node);
1390  Assert(n16->base.count == RT_FANOUT_16_LO);
1391  insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1392  RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1393  n16->chunks, n16->children,
1394  RT_FANOUT_16_LO, insertpos);
1395 
1396  /* insert new chunk into place */
1397  new16->chunks[insertpos] = chunk;
1398 
1399  new16->base.count++;
1400  RT_VERIFY_NODE((RT_NODE *) new16);
1401 
1402  /* free old node and update references */
1403  RT_FREE_NODE(tree, node);
1404  *parent_slot = newnode.alloc;
1405 
1406  return &new16->children[insertpos];
1407  }
1408  else
1409  {
1410  RT_CHILD_PTR newnode;
1411  RT_NODE_48 *new48;
1412  int idx,
1413  bit;
1414 
1415  Assert(n16->base.fanout == RT_FANOUT_16_HI);
1416 
1417  /* initialize new node */
1419  new48 = (RT_NODE_48 *) newnode.local;
1420 
1421  /* copy over the entries */
1422  RT_COPY_COMMON(newnode, node);
1423  for (int i = 0; i < RT_FANOUT_16_HI; i++)
1424  new48->slot_idxs[n16->chunks[i]] = i;
1425  memcpy(&new48->children[0], &n16->children[0], RT_FANOUT_16_HI * sizeof(new48->children[0]));
1426 
1427  /*
1428  * Since we just copied a dense array, we can fill "isset" using a
1429  * single store, provided the length of that array is at most the
1430  * number of bits in a bitmapword.
1431  */
1433  new48->isset[0] = (bitmapword) (((uint64) 1 << RT_FANOUT_16_HI) - 1);
1434 
1435  /* put the new child at the end of the copied entries */
1436  insertpos = RT_FANOUT_16_HI;
1437  idx = RT_BM_IDX(insertpos);
1438  bit = RT_BM_BIT(insertpos);
1439 
1440  /* mark the slot used */
1441  new48->isset[idx] |= ((bitmapword) 1 << bit);
1442 
1443  /* insert new chunk into place */
1444  new48->slot_idxs[chunk] = insertpos;
1445 
1446  new48->base.count++;
1447  RT_VERIFY_NODE((RT_NODE *) new48);
1448 
1449  /* free old node and update reference in parent */
1450  *parent_slot = newnode.alloc;
1451  RT_FREE_NODE(tree, node);
1452 
1453  return &new48->children[insertpos];
1454  }
1455 }
1456 
1457 static inline RT_PTR_ALLOC *
1459 {
1460  RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1461  int insertpos = RT_NODE_16_GET_INSERTPOS(n16, chunk);
1462 
1463  /* shift chunks and children */
1465  n16->base.count, insertpos);
1466 
1467  /* insert new chunk into place */
1468  n16->chunks[insertpos] = chunk;
1469 
1470  n16->base.count++;
1471  RT_VERIFY_NODE((RT_NODE *) n16);
1472 
1473  return &n16->children[insertpos];
1474 }
1475 
1476 static pg_noinline RT_PTR_ALLOC *
1478  uint8 chunk)
1479 {
1480  RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1481  RT_CHILD_PTR newnode;
1482  RT_NODE_16 *new16;
1483  int insertpos;
1484 
1485  /* initialize new node */
1487  new16 = (RT_NODE_16 *) newnode.local;
1488 
1489  /* copy over existing entries */
1490  RT_COPY_COMMON(newnode, node);
1491  Assert(n4->base.count == RT_FANOUT_4);
1492  insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, RT_FANOUT_4);
1493  RT_COPY_ARRAYS_FOR_INSERT(new16->chunks, new16->children,
1494  n4->chunks, n4->children,
1495  RT_FANOUT_4, insertpos);
1496 
1497  /* insert new chunk into place */
1498  new16->chunks[insertpos] = chunk;
1499 
1500  new16->base.count++;
1501  RT_VERIFY_NODE((RT_NODE *) new16);
1502 
1503  /* free old node and update reference in parent */
1504  *parent_slot = newnode.alloc;
1505  RT_FREE_NODE(tree, node);
1506 
1507  return &new16->children[insertpos];
1508 }
1509 
1510 static inline RT_PTR_ALLOC *
1512 {
1513  RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
1514  int count = n4->base.count;
1515  int insertpos = RT_NODE_4_GET_INSERTPOS(n4, chunk, count);
1516 
1517  /* shift chunks and children */
1519  count, insertpos);
1520 
1521  /* insert new chunk into place */
1522  n4->chunks[insertpos] = chunk;
1523 
1524  n4->base.count++;
1525  RT_VERIFY_NODE((RT_NODE *) n4);
1526 
1527  return &n4->children[insertpos];
1528 }
1529 
1530 /*
1531  * Reserve slot in "node"'s child array. The caller will populate it
1532  * with the actual child pointer.
1533  *
1534  * "parent_slot" is the address of the parent's child pointer to "node".
1535  * If the node we're inserting into needs to grow, we update the parent's
1536  * child pointer with the pointer to the new larger node.
1537  */
1538 static inline RT_PTR_ALLOC *
1540  uint8 chunk)
1541 {
1542  RT_NODE *n = node.local;
1543 
1544  switch (n->kind)
1545  {
1546  case RT_NODE_KIND_4:
1547  {
1548  if (unlikely(RT_NODE_MUST_GROW(n)))
1549  return RT_GROW_NODE_4(tree, parent_slot, node, chunk);
1550 
1551  return RT_ADD_CHILD_4(tree, node, chunk);
1552  }
1553  case RT_NODE_KIND_16:
1554  {
1555  if (unlikely(RT_NODE_MUST_GROW(n)))
1556  return RT_GROW_NODE_16(tree, parent_slot, node, chunk);
1557 
1558  return RT_ADD_CHILD_16(tree, node, chunk);
1559  }
1560  case RT_NODE_KIND_48:
1561  {
1562  if (unlikely(RT_NODE_MUST_GROW(n)))
1563  return RT_GROW_NODE_48(tree, parent_slot, node, chunk);
1564 
1565  return RT_ADD_CHILD_48(tree, node, chunk);
1566  }
1567  case RT_NODE_KIND_256:
1568  return RT_ADD_CHILD_256(tree, node, chunk);
1569  default:
1570  pg_unreachable();
1571  }
1572 }
1573 
1574 /*
1575  * The radix tree doesn't have sufficient height. Put new node(s) on top,
1576  * and move the old node below it.
1577  */
1578 static pg_noinline void
1580 {
1581  int target_shift = RT_KEY_GET_SHIFT(key);
1582  int shift = tree->ctl->start_shift;
1583 
1584  Assert(shift < target_shift);
1585 
1586  /* Grow tree upwards until start shift can accommodate the key */
1587  while (shift < target_shift)
1588  {
1589  RT_CHILD_PTR node;
1590  RT_NODE_4 *n4;
1591 
1593  n4 = (RT_NODE_4 *) node.local;
1594  n4->base.count = 1;
1595  n4->chunks[0] = 0;
1596  n4->children[0] = tree->ctl->root;
1597 
1598  /* Update the root */
1599  tree->ctl->root = node.alloc;
1600 
1601  shift += RT_SPAN;
1602  }
1603 
1604  tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(target_shift);
1605  tree->ctl->start_shift = target_shift;
1606 }
1607 
1608 /*
1609  * Insert a chain of nodes until we reach the lowest level,
1610  * and return the address of a slot to be filled further up
1611  * the call stack.
1612  */
1613 static pg_noinline RT_PTR_ALLOC *
1614 RT_EXTEND_DOWN(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
1615 {
1616  RT_CHILD_PTR node,
1617  child;
1618  RT_NODE_4 *n4;
1619 
1620  /*
1621  * The child pointer of the first node in the chain goes in the
1622  * caller-provided slot.
1623  */
1625  *parent_slot = child.alloc;
1626 
1627  node = child;
1628  shift -= RT_SPAN;
1629 
1630  while (shift > 0)
1631  {
1633 
1634  /* We open-code the insertion ourselves, for speed. */
1635  n4 = (RT_NODE_4 *) node.local;
1636  n4->base.count = 1;
1637  n4->chunks[0] = RT_GET_KEY_CHUNK(key, shift);
1638  n4->children[0] = child.alloc;
1639 
1640  node = child;
1641  shift -= RT_SPAN;
1642  }
1643  Assert(shift == 0);
1644 
1645  /* Reserve slot for the value. */
1646  n4 = (RT_NODE_4 *) node.local;
1647  n4->chunks[0] = RT_GET_KEY_CHUNK(key, 0);
1648  n4->base.count = 1;
1649 
1650  return &n4->children[0];
1651 }
1652 
1653 /*
1654  * Workhorse for RT_SET
1655  *
1656  * "parent_slot" is the address of the child pointer we just followed,
1657  * in the parent's array of children, needed if inserting into the
1658  * current node causes it to grow.
1659  */
1660 static RT_PTR_ALLOC *
1661 RT_GET_SLOT_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift, bool *found)
1662 {
1663  RT_PTR_ALLOC *slot;
1664  RT_CHILD_PTR node;
1665  uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
1666 
1667  node.alloc = *parent_slot;
1668  RT_PTR_SET_LOCAL(tree, &node);
1669  slot = RT_NODE_SEARCH(node.local, chunk);
1670 
1671  if (slot == NULL)
1672  {
1673  *found = false;
1674 
1675  /* reserve slot for the caller to populate */
1676 
1677  slot = RT_NODE_INSERT(tree, parent_slot, node, chunk);
1678 
1679  if (shift == 0)
1680  return slot;
1681  else
1682  return RT_EXTEND_DOWN(tree, slot, key, shift);
1683  }
1684  else
1685  {
1686  if (shift == 0)
1687  {
1688  *found = true;
1689  return slot;
1690  }
1691  else
1692  return RT_GET_SLOT_RECURSIVE(tree, slot, key, shift - RT_SPAN, found);
1693  }
1694 }
1695 
1696 /*
1697  * Set key to value that value_p points to. If the entry already exists, we
1698  * update its value and return true. Returns false if entry doesn't yet exist.
1699  *
1700  * Taking a lock in exclusive mode is the caller's responsibility.
1701  */
1702 RT_SCOPE bool
1704 {
1705  bool found;
1706  RT_PTR_ALLOC *slot;
1707  size_t value_sz = RT_GET_VALUE_SIZE(value_p);
1708 
1709 #ifdef RT_SHMEM
1710  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1711 #endif
1712 
1713  Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
1714 
1715  /* Extend the tree if necessary */
1716  if (unlikely(key > tree->ctl->max_val))
1717  {
1718  if (tree->ctl->num_keys == 0)
1719  {
1720  RT_CHILD_PTR node;
1721  RT_NODE_4 *n4;
1723 
1724  /*
1725  * With an empty root node, we don't extend the tree upwards,
1726  * since that would result in orphan empty nodes. Instead we open
1727  * code inserting into the root node and extend downward from
1728  * there.
1729  */
1730  node.alloc = tree->ctl->root;
1731  RT_PTR_SET_LOCAL(tree, &node);
1732  n4 = (RT_NODE_4 *) node.local;
1733  n4->base.count = 1;
1735 
1736  slot = RT_EXTEND_DOWN(tree, &n4->children[0], key, start_shift);
1737  found = false;
1738  tree->ctl->start_shift = start_shift;
1739  tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(start_shift);
1740  goto have_slot;
1741  }
1742  else
1743  RT_EXTEND_UP(tree, key);
1744  }
1745 
1746  slot = RT_GET_SLOT_RECURSIVE(tree, &tree->ctl->root,
1747  key, tree->ctl->start_shift, &found);
1748 
1749 have_slot:
1750  Assert(slot != NULL);
1751 
1752  if (RT_VALUE_IS_EMBEDDABLE(value_p))
1753  {
1754  /* free the existing leaf */
1755  if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1756  RT_FREE_LEAF(tree, *slot);
1757 
1758  /* store value directly in child pointer slot */
1759  memcpy(slot, value_p, value_sz);
1760 
1761 #ifdef RT_RUNTIME_EMBEDDABLE_VALUE
1762  /* tag child pointer */
1763 #ifdef RT_SHMEM
1764  *slot |= 1;
1765 #else
1766  *((uintptr_t *) slot) |= 1;
1767 #endif
1768 #endif
1769  }
1770  else
1771  {
1772  RT_CHILD_PTR leaf;
1773 
1774  if (found && !RT_CHILDPTR_IS_VALUE(*slot))
1775  {
1776  Assert(RT_PTR_ALLOC_IS_VALID(*slot));
1777  leaf.alloc = *slot;
1778  RT_PTR_SET_LOCAL(tree, &leaf);
1779 
1780  if (RT_GET_VALUE_SIZE((RT_VALUE_TYPE *) leaf.local) != value_sz)
1781  {
1782  /*
1783  * different sizes, so first free the existing leaf before
1784  * allocating a new one
1785  */
1786  RT_FREE_LEAF(tree, *slot);
1787  leaf = RT_ALLOC_LEAF(tree, value_sz);
1788  *slot = leaf.alloc;
1789  }
1790  }
1791  else
1792  {
1793  /* allocate new leaf and store it in the child array */
1794  leaf = RT_ALLOC_LEAF(tree, value_sz);
1795  *slot = leaf.alloc;
1796  }
1797 
1798  memcpy(leaf.local, value_p, value_sz);
1799  }
1800 
1801  /* Update the statistics */
1802  if (!found)
1803  tree->ctl->num_keys++;
1804 
1805  return found;
1806 }
1807 
1808 /***************** SETUP / TEARDOWN *****************/
1809 
1810 /*
1811  * Create the radix tree in the given memory context and return it.
1812  *
1813  * All local memory required for a radix tree is allocated in the given
1814  * memory context and its children. Note that RT_FREE() will delete all
1815  * allocated space within the given memory context, so the dsa_area should
1816  * be created in a different context.
1817  */
1819 #ifdef RT_SHMEM
1820 RT_CREATE(MemoryContext ctx, dsa_area *dsa, int tranche_id)
1821 #else
1823 #endif
1824 {
1828 #ifdef RT_SHMEM
1829  dsa_pointer dp;
1830 #endif
1831 
1833 
1835  tree->context = ctx;
1836 
1837  /*
1838  * Separate context for iteration in case the tree context doesn't support
1839  * pfree
1840  */
1841  tree->iter_context = AllocSetContextCreate(ctx,
1842  RT_STR(RT_PREFIX) "_radix_tree iter context",
1844 
1845 #ifdef RT_SHMEM
1846  tree->dsa = dsa;
1847  dp = dsa_allocate0(dsa, sizeof(RT_RADIX_TREE_CONTROL));
1848  tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, dp);
1849  tree->ctl->handle = dp;
1850  tree->ctl->magic = RT_RADIX_TREE_MAGIC;
1851  LWLockInitialize(&tree->ctl->lock, tranche_id);
1852 #else
1854 
1855  /* Create a slab context for each size class */
1856  for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
1857  {
1858  RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
1859  size_t inner_blocksize = RT_SLAB_BLOCK_SIZE(size_class.allocsize);
1860 
1861  tree->node_slabs[i] = SlabContextCreate(ctx,
1862  size_class.name,
1863  inner_blocksize,
1864  size_class.allocsize);
1865  }
1866 
1867  /* By default we use the passed context for leaves. */
1868  tree->leaf_context = tree->context;
1869 
1870 #ifndef RT_VARLEN_VALUE_SIZE
1871 
1872  /*
1873  * For leaves storing fixed-length values, we use a slab context to avoid
1874  * the possibility of space wastage by power-of-2 rounding up.
1875  */
1876  if (sizeof(RT_VALUE_TYPE) > sizeof(RT_PTR_ALLOC))
1877  tree->leaf_context = SlabContextCreate(ctx,
1878  RT_STR(RT_PREFIX) "_radix_tree leaf context",
1880  sizeof(RT_VALUE_TYPE));
1881 #endif /* !RT_VARLEN_VALUE_SIZE */
1882 #endif /* RT_SHMEM */
1883 
1884  /* add root node now so that RT_SET can assume it exists */
1886  tree->ctl->root = rootnode.alloc;
1887  tree->ctl->start_shift = 0;
1888  tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
1889 
1891 
1892  return tree;
1893 }
1894 
1895 #ifdef RT_SHMEM
1897 RT_ATTACH(dsa_area *dsa, RT_HANDLE handle)
1898 {
1900  dsa_pointer control;
1901 
1902  tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
1903 
1904  /* Find the control object in shared memory */
1905  control = handle;
1906 
1907  tree->dsa = dsa;
1908  tree->ctl = (RT_RADIX_TREE_CONTROL *) dsa_get_address(dsa, control);
1909  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1910 
1911  return tree;
1912 }
1913 
1914 RT_SCOPE void
1915 RT_DETACH(RT_RADIX_TREE * tree)
1916 {
1917  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1918  pfree(tree);
1919 }
1920 
1921 RT_SCOPE RT_HANDLE
1922 RT_GET_HANDLE(RT_RADIX_TREE * tree)
1923 {
1924  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1925  return tree->ctl->handle;
1926 }
1927 
1928 RT_SCOPE void
1929 RT_LOCK_EXCLUSIVE(RT_RADIX_TREE * tree)
1930 {
1931  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1932  LWLockAcquire(&tree->ctl->lock, LW_EXCLUSIVE);
1933 }
1934 
1935 RT_SCOPE void
1936 RT_LOCK_SHARE(RT_RADIX_TREE * tree)
1937 {
1938  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1939  LWLockAcquire(&tree->ctl->lock, LW_SHARED);
1940 }
1941 
1942 RT_SCOPE void
1943 RT_UNLOCK(RT_RADIX_TREE * tree)
1944 {
1945  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
1946  LWLockRelease(&tree->ctl->lock);
1947 }
1948 
1949 /*
1950  * Recursively free all nodes allocated in the DSA area.
1951  */
1952 static void
1954 {
1955  RT_CHILD_PTR node;
1956 
1958 
1959  node.alloc = ptr;
1960  RT_PTR_SET_LOCAL(tree, &node);
1961 
1962  switch (node.local->kind)
1963  {
1964  case RT_NODE_KIND_4:
1965  {
1966  RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
1967 
1968  for (int i = 0; i < n4->base.count; i++)
1969  {
1970  RT_PTR_ALLOC child = n4->children[i];
1971 
1972  if (shift > 0)
1973  RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1974  else if (!RT_CHILDPTR_IS_VALUE(child))
1975  dsa_free(tree->dsa, child);
1976  }
1977 
1978  break;
1979  }
1980  case RT_NODE_KIND_16:
1981  {
1982  RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
1983 
1984  for (int i = 0; i < n16->base.count; i++)
1985  {
1986  RT_PTR_ALLOC child = n16->children[i];
1987 
1988  if (shift > 0)
1989  RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
1990  else if (!RT_CHILDPTR_IS_VALUE(child))
1991  dsa_free(tree->dsa, child);
1992  }
1993 
1994  break;
1995  }
1996  case RT_NODE_KIND_48:
1997  {
1998  RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
1999 
2000  for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2001  {
2002  RT_PTR_ALLOC child;
2003 
2004  if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2005  continue;
2006 
2007  child = *RT_NODE_48_GET_CHILD(n48, i);
2008 
2009  if (shift > 0)
2010  RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2011  else if (!RT_CHILDPTR_IS_VALUE(child))
2012  dsa_free(tree->dsa, child);
2013  }
2014 
2015  break;
2016  }
2017  case RT_NODE_KIND_256:
2018  {
2019  RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2020 
2021  for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2022  {
2023  RT_PTR_ALLOC child;
2024 
2025  if (!RT_NODE_256_IS_CHUNK_USED(n256, i))
2026  continue;
2027 
2028  child = *RT_NODE_256_GET_CHILD(n256, i);
2029 
2030  if (shift > 0)
2031  RT_FREE_RECURSE(tree, child, shift - RT_SPAN);
2032  else if (!RT_CHILDPTR_IS_VALUE(child))
2033  dsa_free(tree->dsa, child);
2034  }
2035 
2036  break;
2037  }
2038  }
2039 
2040  /* Free the inner node */
2041  dsa_free(tree->dsa, ptr);
2042 }
2043 #endif
2044 
2045 /*
2046  * Free the radix tree, including all nodes and leaves.
2047  */
2048 RT_SCOPE void
2050 {
2051 #ifdef RT_SHMEM
2052  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2053 
2054  /* Free all memory used for radix tree nodes */
2055  Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2056  RT_FREE_RECURSE(tree, tree->ctl->root, tree->ctl->start_shift);
2057 
2058  /*
2059  * Vandalize the control block to help catch programming error where other
2060  * backends access the memory formerly occupied by this radix tree.
2061  */
2062  tree->ctl->magic = 0;
2063  dsa_free(tree->dsa, tree->ctl->handle);
2064 #endif
2065 
2066  /*
2067  * Free all space allocated within the tree's context and delete all child
2068  * contexts such as those used for nodes.
2069  */
2070  MemoryContextReset(tree->context);
2071 }
2072 
2073 /***************** ITERATION *****************/
2074 
2075 /*
2076  * Create and return the iterator for the given radix tree.
2077  *
2078  * Taking a lock in shared mode during the iteration is the caller's
2079  * responsibility.
2080  */
2081 RT_SCOPE RT_ITER *
2083 {
2084  RT_ITER *iter;
2086 
2087  iter = (RT_ITER *) MemoryContextAllocZero(tree->iter_context,
2088  sizeof(RT_ITER));
2089  iter->tree = tree;
2090 
2091  Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2092  root.alloc = iter->tree->ctl->root;
2094 
2095  iter->top_level = iter->tree->ctl->start_shift / RT_SPAN;
2096 
2097  /* Set the root to start */
2098  iter->cur_level = iter->top_level;
2099  iter->node_iters[iter->cur_level].node = root;
2100  iter->node_iters[iter->cur_level].idx = 0;
2101 
2102  return iter;
2103 }
2104 
2105 /*
2106  * Scan the inner node and return the next child pointer if one exists, otherwise
2107  * return NULL.
2108  */
2109 static inline RT_PTR_ALLOC *
2110 RT_NODE_ITERATE_NEXT(RT_ITER * iter, int level)
2111 {
2112  uint8 key_chunk = 0;
2113  RT_NODE_ITER *node_iter;
2114  RT_CHILD_PTR node;
2115  RT_PTR_ALLOC *slot = NULL;
2116 
2117 #ifdef RT_SHMEM
2118  Assert(iter->tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2119 #endif
2120 
2121  node_iter = &(iter->node_iters[level]);
2122  node = node_iter->node;
2123 
2124  Assert(node.local != NULL);
2125 
2126  switch ((node.local)->kind)
2127  {
2128  case RT_NODE_KIND_4:
2129  {
2130  RT_NODE_4 *n4 = (RT_NODE_4 *) (node.local);
2131 
2132  if (node_iter->idx >= n4->base.count)
2133  return NULL;
2134 
2135  slot = &n4->children[node_iter->idx];
2136  key_chunk = n4->chunks[node_iter->idx];
2137  node_iter->idx++;
2138  break;
2139  }
2140  case RT_NODE_KIND_16:
2141  {
2142  RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2143 
2144  if (node_iter->idx >= n16->base.count)
2145  return NULL;
2146 
2147  slot = &n16->children[node_iter->idx];
2148  key_chunk = n16->chunks[node_iter->idx];
2149  node_iter->idx++;
2150  break;
2151  }
2152  case RT_NODE_KIND_48:
2153  {
2154  RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2155  int chunk;
2156 
2157  for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2158  {
2159  if (RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2160  break;
2161  }
2162 
2163  if (chunk >= RT_NODE_MAX_SLOTS)
2164  return NULL;
2165 
2166  slot = RT_NODE_48_GET_CHILD(n48, chunk);
2167 
2168  key_chunk = chunk;
2169  node_iter->idx = chunk + 1;
2170  break;
2171  }
2172  case RT_NODE_KIND_256:
2173  {
2174  RT_NODE_256 *n256 = (RT_NODE_256 *) (node.local);
2175  int chunk;
2176 
2177  for (chunk = node_iter->idx; chunk < RT_NODE_MAX_SLOTS; chunk++)
2178  {
2179  if (RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2180  break;
2181  }
2182 
2183  if (chunk >= RT_NODE_MAX_SLOTS)
2184  return NULL;
2185 
2186  slot = RT_NODE_256_GET_CHILD(n256, chunk);
2187 
2188  key_chunk = chunk;
2189  node_iter->idx = chunk + 1;
2190  break;
2191  }
2192  }
2193 
2194  /* Update the key */
2195  iter->key &= ~(((uint64) RT_CHUNK_MASK) << (level * RT_SPAN));
2196  iter->key |= (((uint64) key_chunk) << (level * RT_SPAN));
2197 
2198  return slot;
2199 }
2200 
2201 /*
2202  * Return pointer to value and set key_p as long as there is a key. Otherwise
2203  * return NULL.
2204  */
2206 RT_ITERATE_NEXT(RT_ITER * iter, uint64 *key_p)
2207 {
2208  RT_PTR_ALLOC *slot = NULL;
2209 
2210  while (iter->cur_level <= iter->top_level)
2211  {
2212  RT_CHILD_PTR node;
2213 
2214  slot = RT_NODE_ITERATE_NEXT(iter, iter->cur_level);
2215 
2216  if (iter->cur_level == 0 && slot != NULL)
2217  {
2218  /* Found a value at the leaf node */
2219  *key_p = iter->key;
2220  node.alloc = *slot;
2221 
2222  if (RT_CHILDPTR_IS_VALUE(*slot))
2223  return (RT_VALUE_TYPE *) slot;
2224  else
2225  {
2226  RT_PTR_SET_LOCAL(iter->tree, &node);
2227  return (RT_VALUE_TYPE *) node.local;
2228  }
2229  }
2230 
2231  if (slot != NULL)
2232  {
2233  /* Found the child slot, move down the tree */
2234  node.alloc = *slot;
2235  RT_PTR_SET_LOCAL(iter->tree, &node);
2236 
2237  iter->cur_level--;
2238  iter->node_iters[iter->cur_level].node = node;
2239  iter->node_iters[iter->cur_level].idx = 0;
2240  }
2241  else
2242  {
2243  /* Not found the child slot, move up the tree */
2244  iter->cur_level++;
2245  }
2246  }
2247 
2248  /* We've visited all nodes, so the iteration finished */
2249  return NULL;
2250 }
2251 
2252 /*
2253  * Terminate the iteration. The caller is responsible for releasing any locks.
2254  */
2255 RT_SCOPE void
2257 {
2258  pfree(iter);
2259 }
2260 
2261 /***************** DELETION *****************/
2262 
2263 #ifdef RT_USE_DELETE
2264 
2265 /* Delete the element at "deletepos" */
2266 static inline void
2267 RT_SHIFT_ARRAYS_AND_DELETE(uint8 *chunks, RT_PTR_ALLOC * children, int count, int deletepos)
2268 {
2269  /*
2270  * This is basically a memmove, but written in a simple loop for speed on
2271  * small inputs.
2272  */
2273  for (int i = deletepos; i < count - 1; i++)
2274  {
2275  /* workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101481 */
2276 #ifdef __GNUC__
2277  __asm__("");
2278 #endif
2279  chunks[i] = chunks[i + 1];
2280  children[i] = children[i + 1];
2281  }
2282 }
2283 
2284 /*
2285  * Copy both chunk and slot arrays into the right
2286  * place. The element at "deletepos" is deleted by skipping it.
2287  */
2288 static inline void
2289 RT_COPY_ARRAYS_AND_DELETE(uint8 *dst_chunks, RT_PTR_ALLOC * dst_children,
2290  uint8 *src_chunks, RT_PTR_ALLOC * src_children,
2291  int count, int deletepos)
2292 {
2293  for (int i = 0; i < count - 1; i++)
2294  {
2295  /*
2296  * use a branch-free computation to skip the index of the deleted
2297  * element
2298  */
2299  int sourceidx = i + (i >= deletepos);
2300  int destidx = i;
2301 
2302  dst_chunks[destidx] = src_chunks[sourceidx];
2303  dst_children[destidx] = src_children[sourceidx];
2304  }
2305 }
2306 
2307 /*
2308  * Note: While all node-growing functions are called to perform an insertion
2309  * when no more space is available, shrinking is not a hard-and-fast requirement.
2310  * When shrinking nodes, we generally wait until the count is about 3/4 of
2311  * the next lower node's fanout. This prevents ping-ponging between different
2312  * node sizes.
2313  *
2314  * Some shrinking functions delete first and then shrink, either because we
2315  * must or because it's fast and simple that way. Sometimes it's faster to
2316  * delete while shrinking.
2317  */
2318 
2319 /*
2320  * Move contents of a node256 to a node48. Any deletion should have happened
2321  * in the caller.
2322  */
2323 static void pg_noinline
2325 {
2326  RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2327  RT_CHILD_PTR newnode;
2328  RT_NODE_48 *new48;
2329  int slot_idx = 0;
2330 
2331  /* initialize new node */
2333  new48 = (RT_NODE_48 *) newnode.local;
2334 
2335  /* copy over the entries */
2336  RT_COPY_COMMON(newnode, node);
2337  for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2338  {
2339  if (RT_NODE_256_IS_CHUNK_USED(n256, i))
2340  {
2341  new48->slot_idxs[i] = slot_idx;
2342  new48->children[slot_idx] = n256->children[i];
2343  slot_idx++;
2344  }
2345  }
2346 
2347  /*
2348  * Since we just copied a dense array, we can fill "isset" using a single
2349  * store, provided the length of that array is at most the number of bits
2350  * in a bitmapword.
2351  */
2353  new48->isset[0] = (bitmapword) (((uint64) 1 << n256->base.count) - 1);
2354 
2355  /* free old node and update reference in parent */
2356  *parent_slot = newnode.alloc;
2357  RT_FREE_NODE(tree, node);
2358 }
2359 
2360 static inline void
2362 {
2363  int shrink_threshold;
2364  RT_NODE_256 *n256 = (RT_NODE_256 *) node.local;
2365  int idx = RT_BM_IDX(chunk);
2366  int bitnum = RT_BM_BIT(chunk);
2367 
2368  /* Mark the slot free for "chunk" */
2369  n256->isset[idx] &= ~((bitmapword) 1 << bitnum);
2370 
2371  n256->base.count--;
2372 
2373  /*
2374  * A full node256 will have a count of zero because of overflow, so we
2375  * delete first before checking the shrink threshold.
2376  */
2377  Assert(n256->base.count > 0);
2378 
2379  /* This simplifies RT_SHRINK_NODE_256() */
2380  shrink_threshold = BITS_PER_BITMAPWORD;
2381  shrink_threshold = Min(RT_FANOUT_48 / 4 * 3, shrink_threshold);
2382 
2383  if (n256->base.count <= shrink_threshold)
2384  RT_SHRINK_NODE_256(tree, parent_slot, node, chunk);
2385 }
2386 
2387 /*
2388  * Move contents of a node48 to a node16. Any deletion should have happened
2389  * in the caller.
2390  */
2391 static void pg_noinline
2393 {
2394  RT_NODE_48 *n48 = (RT_NODE_48 *) (node.local);
2395  RT_CHILD_PTR newnode;
2396  RT_NODE_16 *new16;
2397  int destidx = 0;
2398 
2399  /*
2400  * Initialize new node. For now we skip the larger node16 size class for
2401  * simplicity.
2402  */
2404  new16 = (RT_NODE_16 *) newnode.local;
2405 
2406  /* copy over all existing entries */
2407  RT_COPY_COMMON(newnode, node);
2408  for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2409  {
2410  if (n48->slot_idxs[chunk] != RT_INVALID_SLOT_IDX)
2411  {
2412  new16->chunks[destidx] = chunk;
2413  new16->children[destidx] = n48->children[n48->slot_idxs[chunk]];
2414  destidx++;
2415  }
2416  }
2417 
2418  Assert(destidx < new16->base.fanout);
2419 
2420  RT_VERIFY_NODE((RT_NODE *) new16);
2421 
2422  /* free old node and update reference in parent */
2423  *parent_slot = newnode.alloc;
2424  RT_FREE_NODE(tree, node);
2425 }
2426 
2427 static inline void
2429 {
2430  RT_NODE_48 *n48 = (RT_NODE_48 *) node.local;
2431  int deletepos = n48->slot_idxs[chunk];
2432 
2433  /* For now we skip the larger node16 size class for simplicity */
2434  int shrink_threshold = RT_FANOUT_16_LO / 4 * 3;
2435  int idx;
2436  int bitnum;
2437 
2438  Assert(deletepos != RT_INVALID_SLOT_IDX);
2439 
2440  idx = RT_BM_IDX(deletepos);
2441  bitnum = RT_BM_BIT(deletepos);
2442  n48->isset[idx] &= ~((bitmapword) 1 << bitnum);
2444 
2445  n48->base.count--;
2446 
2447  /*
2448  * To keep shrinking simple, do it after deleting, which is fast for
2449  * node48 anyway.
2450  */
2451  if (n48->base.count <= shrink_threshold)
2452  RT_SHRINK_NODE_48(tree, parent_slot, node, chunk);
2453 }
2454 
2455 /*
2456  * Move contents of a node16 to a node4, and delete the one at "deletepos".
2457  * By deleting as we move, we can avoid memmove operations in the new
2458  * node.
2459  */
2460 static void pg_noinline
2461 RT_SHRINK_NODE_16(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, RT_CHILD_PTR node, uint8 deletepos)
2462 {
2463  RT_NODE_16 *n16 = (RT_NODE_16 *) (node.local);
2464  RT_CHILD_PTR newnode;
2465  RT_NODE_4 *new4;
2466 
2467  /* initialize new node */
2469  new4 = (RT_NODE_4 *) newnode.local;
2470 
2471  /* copy over existing entries, except for the one at "deletepos" */
2472  RT_COPY_COMMON(newnode, node);
2474  n16->chunks, n16->children,
2475  n16->base.count, deletepos);
2476 
2477  new4->base.count--;
2478  RT_VERIFY_NODE((RT_NODE *) new4);
2479 
2480  /* free old node and update reference in parent */
2481  *parent_slot = newnode.alloc;
2482  RT_FREE_NODE(tree, node);
2483 }
2484 
2485 static inline void
2487 {
2488  RT_NODE_16 *n16 = (RT_NODE_16 *) node.local;
2489  int deletepos = slot - n16->children;
2490 
2491  /*
2492  * When shrinking to node4, 4 is hard-coded. After shrinking, the new node
2493  * will end up with 3 elements and 3 is the largest count where linear
2494  * search is faster than SIMD, at least on x86-64.
2495  */
2496  if (n16->base.count <= 4)
2497  {
2498  RT_SHRINK_NODE_16(tree, parent_slot, node, deletepos);
2499  return;
2500  }
2501 
2502  Assert(deletepos >= 0);
2503  Assert(n16->chunks[deletepos] == chunk);
2504 
2506  n16->base.count, deletepos);
2507  n16->base.count--;
2508 }
2509 
2510 static inline void
2512 {
2513  RT_NODE_4 *n4 = (RT_NODE_4 *) node.local;
2514 
2515  if (n4->base.count == 1)
2516  {
2517  Assert(n4->chunks[0] == chunk);
2518 
2519  /*
2520  * If we're deleting the last entry from the root child node don't
2521  * free it, but mark both the tree and the root child node empty. That
2522  * way, RT_SET can assume it exists.
2523  */
2524  if (parent_slot == &tree->ctl->root)
2525  {
2526  n4->base.count = 0;
2527  tree->ctl->start_shift = 0;
2528  tree->ctl->max_val = RT_SHIFT_GET_MAX_VAL(0);
2529  }
2530  else
2531  {
2532  /*
2533  * Deleting last entry, so just free the entire node.
2534  * RT_DELETE_RECURSIVE has already freed the value and lower-level
2535  * children.
2536  */
2537  RT_FREE_NODE(tree, node);
2538 
2539  /*
2540  * Also null out the parent's slot -- this tells the next higher
2541  * level to delete its child pointer
2542  */
2543  *parent_slot = RT_INVALID_PTR_ALLOC;
2544  }
2545  }
2546  else
2547  {
2548  int deletepos = slot - n4->children;
2549 
2550  Assert(deletepos >= 0);
2551  Assert(n4->chunks[deletepos] == chunk);
2552 
2554  n4->base.count, deletepos);
2555 
2556  n4->base.count--;
2557  }
2558 }
2559 
2560 /*
2561  * Delete the child pointer corresponding to "key" in the given node.
2562  */
2563 static inline void
2565 {
2566  switch ((node.local)->kind)
2567  {
2568  case RT_NODE_KIND_4:
2569  {
2570  RT_REMOVE_CHILD_4(tree, parent_slot, node, chunk, slot);
2571  return;
2572  }
2573  case RT_NODE_KIND_16:
2574  {
2575  RT_REMOVE_CHILD_16(tree, parent_slot, node, chunk, slot);
2576  return;
2577  }
2578  case RT_NODE_KIND_48:
2579  {
2580  RT_REMOVE_CHILD_48(tree, parent_slot, node, chunk);
2581  return;
2582  }
2583  case RT_NODE_KIND_256:
2584  {
2585  RT_REMOVE_CHILD_256(tree, parent_slot, node, chunk);
2586  return;
2587  }
2588  default:
2589  pg_unreachable();
2590  }
2591 }
2592 
2593 /* workhorse for RT_DELETE */
2594 static bool
2595 RT_DELETE_RECURSIVE(RT_RADIX_TREE * tree, RT_PTR_ALLOC * parent_slot, uint64 key, int shift)
2596 {
2597  RT_PTR_ALLOC *slot;
2598  RT_CHILD_PTR node;
2599  uint8 chunk = RT_GET_KEY_CHUNK(key, shift);
2600 
2601  node.alloc = *parent_slot;
2602  RT_PTR_SET_LOCAL(tree, &node);
2603  slot = RT_NODE_SEARCH(node.local, chunk);
2604 
2605  if (slot == NULL)
2606  return false;
2607 
2608  if (shift == 0)
2609  {
2610  if (!RT_CHILDPTR_IS_VALUE(*slot))
2611  RT_FREE_LEAF(tree, *slot);
2612 
2613  RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2614  return true;
2615  }
2616  else
2617  {
2618  bool deleted;
2619 
2620  deleted = RT_DELETE_RECURSIVE(tree, slot, key, shift - RT_SPAN);
2621 
2622  /* Child node was freed, so delete its slot now */
2623  if (*slot == RT_INVALID_PTR_ALLOC)
2624  {
2625  Assert(deleted);
2626  RT_NODE_DELETE(tree, parent_slot, node, chunk, slot);
2627  }
2628 
2629  return deleted;
2630  }
2631 }
2632 
2633 /*
2634  * Delete the given key from the radix tree. If the key is found delete it
2635  * and return true, otherwise do nothing and return false.
2636  *
2637  * Taking a lock in exclusive mode is the caller's responsibility.
2638  */
2639 RT_SCOPE bool
2640 RT_DELETE(RT_RADIX_TREE * tree, uint64 key)
2641 {
2642  bool deleted;
2643 
2644 #ifdef RT_SHMEM
2645  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2646 #endif
2647 
2648  if (key > tree->ctl->max_val)
2649  return false;
2650 
2651  Assert(RT_PTR_ALLOC_IS_VALID(tree->ctl->root));
2652  deleted = RT_DELETE_RECURSIVE(tree, &tree->ctl->root,
2653  key, tree->ctl->start_shift);
2654 
2655  /* Found the key to delete. Update the statistics */
2656  if (deleted)
2657  {
2658  tree->ctl->num_keys--;
2659  Assert(tree->ctl->num_keys >= 0);
2660  }
2661 
2662  return deleted;
2663 }
2664 
2665 #endif /* RT_USE_DELETE */
2666 
2667 /***************** UTILITY FUNCTIONS *****************/
2668 
2669 /*
2670  * Return the statistics of the amount of memory used by the radix tree.
2671  *
2672  * Since dsa_get_total_size() does appropriate locking, the caller doesn't
2673  * need to take a lock.
2674  */
2675 RT_SCOPE uint64
2677 {
2678  size_t total = 0;
2679 
2680 #ifdef RT_SHMEM
2681  Assert(tree->ctl->magic == RT_RADIX_TREE_MAGIC);
2682  total = dsa_get_total_size(tree->dsa);
2683 #else
2684  total = MemoryContextMemAllocated(tree->context, true);
2685 #endif
2686 
2687  return total;
2688 }
2689 
2690 /*
2691  * Perform some sanity checks on the given node.
2692  */
2693 static void
2695 {
2696 #ifdef USE_ASSERT_CHECKING
2697 
2698  switch (node->kind)
2699  {
2700  case RT_NODE_KIND_4:
2701  {
2702  RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2703 
2704  /* RT_DUMP_NODE(node); */
2705 
2706  for (int i = 1; i < n4->base.count; i++)
2707  Assert(n4->chunks[i - 1] < n4->chunks[i]);
2708 
2709  break;
2710  }
2711  case RT_NODE_KIND_16:
2712  {
2713  RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2714 
2715  /* RT_DUMP_NODE(node); */
2716 
2717  for (int i = 1; i < n16->base.count; i++)
2718  Assert(n16->chunks[i - 1] < n16->chunks[i]);
2719 
2720  break;
2721  }
2722  case RT_NODE_KIND_48:
2723  {
2724  RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2725  int cnt = 0;
2726 
2727  /* RT_DUMP_NODE(node); */
2728 
2729  for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
2730  {
2731  uint8 slot = n48->slot_idxs[i];
2732  int idx = RT_BM_IDX(slot);
2733  int bitnum = RT_BM_BIT(slot);
2734 
2735  if (!RT_NODE_48_IS_CHUNK_USED(n48, i))
2736  continue;
2737 
2738  /* Check if the corresponding slot is used */
2739  Assert(slot < node->fanout);
2740  Assert((n48->isset[idx] & ((bitmapword) 1 << bitnum)) != 0);
2741 
2742  cnt++;
2743  }
2744 
2745  Assert(n48->base.count == cnt);
2746 
2747  break;
2748  }
2749  case RT_NODE_KIND_256:
2750  {
2751  RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2752  int cnt = 0;
2753 
2754  /* RT_DUMP_NODE(node); */
2755 
2756  for (int i = 0; i < RT_BM_IDX(RT_NODE_MAX_SLOTS); i++)
2757  cnt += bmw_popcount(n256->isset[i]);
2758 
2759  /*
2760  * Check if the number of used chunk matches, accounting for
2761  * overflow
2762  */
2763  if (cnt == RT_FANOUT_256)
2764  Assert(n256->base.count == 0);
2765  else
2766  Assert(n256->base.count == cnt);
2767 
2768  break;
2769  }
2770  }
2771 #endif
2772 }
2773 
2774 /***************** DEBUG FUNCTIONS *****************/
2775 
2776 #ifdef RT_DEBUG
2777 
2778 /*
2779  * Print out tree stats, some of which are only collected in debugging builds.
2780  */
2781 RT_SCOPE void
2783 {
2784  fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val);
2785  fprintf(stderr, "num_keys = %lld\n", (long long) tree->ctl->num_keys);
2786 
2787 #ifdef RT_SHMEM
2788  fprintf(stderr, "handle = " DSA_POINTER_FORMAT "\n", tree->ctl->handle);
2789 #endif
2790 
2791  fprintf(stderr, "height = %d", tree->ctl->start_shift / RT_SPAN);
2792 
2793  for (int i = 0; i < RT_NUM_SIZE_CLASSES; i++)
2794  {
2795  RT_SIZE_CLASS_ELEM size_class = RT_SIZE_CLASS_INFO[i];
2796 
2797  fprintf(stderr, ", n%d = %lld", size_class.fanout, (long long) tree->ctl->num_nodes[i]);
2798  }
2799 
2800  fprintf(stderr, ", leaves = %lld", (long long) tree->ctl->num_leaves);
2801 
2802  fprintf(stderr, "\n");
2803 }
2804 
2805 /*
2806  * Print out debugging information about the given node.
2807  */
2808 static void
2810 RT_DUMP_NODE(RT_NODE * node)
2811 {
2812 #ifdef RT_SHMEM
2813 #define RT_CHILD_PTR_FORMAT DSA_POINTER_FORMAT
2814 #else
2815 #define RT_CHILD_PTR_FORMAT "%p"
2816 #endif
2817 
2818  fprintf(stderr, "kind %d, fanout %d, count %u\n",
2819  (node->kind == RT_NODE_KIND_4) ? 4 :
2820  (node->kind == RT_NODE_KIND_16) ? 16 :
2821  (node->kind == RT_NODE_KIND_48) ? 48 : 256,
2822  node->fanout == 0 ? 256 : node->fanout,
2823  node->count == 0 ? 256 : node->count);
2824 
2825  switch (node->kind)
2826  {
2827  case RT_NODE_KIND_4:
2828  {
2829  RT_NODE_4 *n4 = (RT_NODE_4 *) node;
2830 
2831  fprintf(stderr, "chunks and slots:\n");
2832  for (int i = 0; i < n4->base.count; i++)
2833  {
2834  fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2835  i, n4->chunks[i], n4->children[i]);
2836  }
2837 
2838  break;
2839  }
2840  case RT_NODE_KIND_16:
2841  {
2842  RT_NODE_16 *n16 = (RT_NODE_16 *) node;
2843 
2844  fprintf(stderr, "chunks and slots:\n");
2845  for (int i = 0; i < n16->base.count; i++)
2846  {
2847  fprintf(stderr, " [%d] chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2848  i, n16->chunks[i], n16->children[i]);
2849  }
2850  break;
2851  }
2852  case RT_NODE_KIND_48:
2853  {
2854  RT_NODE_48 *n48 = (RT_NODE_48 *) node;
2855  char *sep = "";
2856 
2857  fprintf(stderr, "slot_idxs: \n");
2858  for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2859  {
2860  if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2861  continue;
2862 
2863  fprintf(stderr, " idx[%d] = %d\n",
2864  chunk, n48->slot_idxs[chunk]);
2865  }
2866 
2867  fprintf(stderr, "isset-bitmap: ");
2868  for (int i = 0; i < (RT_FANOUT_48_MAX / BITS_PER_BYTE); i++)
2869  {
2870  fprintf(stderr, "%s%x", sep, ((uint8 *) n48->isset)[i]);
2871  sep = " ";
2872  }
2873  fprintf(stderr, "\n");
2874 
2875  fprintf(stderr, "chunks and slots:\n");
2876  for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2877  {
2878  if (!RT_NODE_48_IS_CHUNK_USED(n48, chunk))
2879  continue;
2880 
2881  fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2882  chunk,
2883  *RT_NODE_48_GET_CHILD(n48, chunk));
2884  }
2885  break;
2886  }
2887  case RT_NODE_KIND_256:
2888  {
2889  RT_NODE_256 *n256 = (RT_NODE_256 *) node;
2890  char *sep = "";
2891 
2892  fprintf(stderr, "isset-bitmap: ");
2893  for (int i = 0; i < (RT_FANOUT_256 / BITS_PER_BYTE); i++)
2894  {
2895  fprintf(stderr, "%s%x", sep, ((uint8 *) n256->isset)[i]);
2896  sep = " ";
2897  }
2898  fprintf(stderr, "\n");
2899 
2900  fprintf(stderr, "chunks and slots:\n");
2901  for (int chunk = 0; chunk < RT_NODE_MAX_SLOTS; chunk++)
2902  {
2903  if (!RT_NODE_256_IS_CHUNK_USED(n256, chunk))
2904  continue;
2905 
2906  fprintf(stderr, " chunk %x slot " RT_CHILD_PTR_FORMAT "\n",
2907  chunk,
2908  *RT_NODE_256_GET_CHILD(n256, chunk));
2909  }
2910  break;
2911  }
2912  }
2913 }
2914 #endif /* RT_DEBUG */
2915 
2916 #endif /* RT_DEFINE */
2917 
2918 
2919 /* undefine external parameters, so next radix tree can be defined */
2920 #undef RT_PREFIX
2921 #undef RT_SCOPE
2922 #undef RT_DECLARE
2923 #undef RT_DEFINE
2924 #undef RT_VALUE_TYPE
2925 #undef RT_VARLEN_VALUE_SIZE
2926 #undef RT_RUNTIME_EMBEDDABLE_VALUE
2927 #undef RT_SHMEM
2928 #undef RT_USE_DELETE
2929 #undef RT_DEBUG
2930 
2931 /* locally declared macros */
2932 #undef RT_MAKE_PREFIX
2933 #undef RT_MAKE_NAME
2934 #undef RT_MAKE_NAME_
2935 #undef RT_STR
2936 #undef RT_STR_
2937 #undef RT_SPAN
2938 #undef RT_NODE_MAX_SLOTS
2939 #undef RT_CHUNK_MASK
2940 #undef RT_MAX_SHIFT
2941 #undef RT_MAX_LEVEL
2942 #undef RT_GET_KEY_CHUNK
2943 #undef RT_BM_IDX
2944 #undef RT_BM_BIT
2945 #undef RT_NODE_MUST_GROW
2946 #undef RT_NODE_KIND_COUNT
2947 #undef RT_NUM_SIZE_CLASSES
2948 #undef RT_INVALID_SLOT_IDX
2949 #undef RT_SLAB_BLOCK_SIZE
2950 #undef RT_RADIX_TREE_MAGIC
2951 #undef RT_CHILD_PTR_FORMAT
2952 
2953 /* type declarations */
2954 #undef RT_RADIX_TREE
2955 #undef RT_RADIX_TREE_CONTROL
2956 #undef RT_CHILD_PTR
2957 #undef RT_PTR_ALLOC
2958 #undef RT_INVALID_PTR_ALLOC
2959 #undef RT_HANDLE
2960 #undef RT_ITER
2961 #undef RT_NODE
2962 #undef RT_NODE_ITER
2963 #undef RT_NODE_KIND_4
2964 #undef RT_NODE_KIND_16
2965 #undef RT_NODE_KIND_48
2966 #undef RT_NODE_KIND_256
2967 #undef RT_NODE_4
2968 #undef RT_NODE_16
2969 #undef RT_NODE_48
2970 #undef RT_NODE_256
2971 #undef RT_SIZE_CLASS
2972 #undef RT_SIZE_CLASS_ELEM
2973 #undef RT_SIZE_CLASS_INFO
2974 #undef RT_CLASS_4
2975 #undef RT_CLASS_16_LO
2976 #undef RT_CLASS_16_HI
2977 #undef RT_CLASS_48
2978 #undef RT_CLASS_256
2979 #undef RT_FANOUT_4
2980 #undef RT_FANOUT_4_MAX
2981 #undef RT_FANOUT_16_LO
2982 #undef RT_FANOUT_16_HI
2983 #undef RT_FANOUT_16_MAX
2984 #undef RT_FANOUT_48
2985 #undef RT_FANOUT_48_MAX
2986 #undef RT_FANOUT_256
2987 
2988 /* function declarations */
2989 #undef RT_CREATE
2990 #undef RT_FREE
2991 #undef RT_ATTACH
2992 #undef RT_DETACH
2993 #undef RT_LOCK_EXCLUSIVE
2994 #undef RT_LOCK_SHARE
2995 #undef RT_UNLOCK
2996 #undef RT_GET_HANDLE
2997 #undef RT_FIND
2998 #undef RT_SET
2999 #undef RT_BEGIN_ITERATE
3000 #undef RT_ITERATE_NEXT
3001 #undef RT_END_ITERATE
3002 #undef RT_DELETE
3003 #undef RT_MEMORY_USAGE
3004 #undef RT_DUMP_NODE
3005 #undef RT_STATS
3006 
3007 /* internal helper functions */
3008 #undef RT_GET_VALUE_SIZE
3009 #undef RT_VALUE_IS_EMBEDDABLE
3010 #undef RT_CHILDPTR_IS_VALUE
3011 #undef RT_GET_SLOT_RECURSIVE
3012 #undef RT_DELETE_RECURSIVE
3013 #undef RT_ALLOC_NODE
3014 #undef RT_ALLOC_LEAF
3015 #undef RT_FREE_NODE
3016 #undef RT_FREE_LEAF
3017 #undef RT_FREE_RECURSE
3018 #undef RT_EXTEND_UP
3019 #undef RT_EXTEND_DOWN
3020 #undef RT_COPY_COMMON
3021 #undef RT_PTR_SET_LOCAL
3022 #undef RT_PTR_ALLOC_IS_VALID
3023 #undef RT_NODE_16_SEARCH_EQ
3024 #undef RT_NODE_4_GET_INSERTPOS
3025 #undef RT_NODE_16_GET_INSERTPOS
3026 #undef RT_SHIFT_ARRAYS_FOR_INSERT
3027 #undef RT_SHIFT_ARRAYS_AND_DELETE
3028 #undef RT_COPY_ARRAYS_FOR_INSERT
3029 #undef RT_COPY_ARRAYS_AND_DELETE
3030 #undef RT_NODE_48_IS_CHUNK_USED
3031 #undef RT_NODE_48_GET_CHILD
3032 #undef RT_NODE_256_IS_CHUNK_USED
3033 #undef RT_NODE_256_GET_CHILD
3034 #undef RT_KEY_GET_SHIFT
3035 #undef RT_SHIFT_GET_MAX_VAL
3036 #undef RT_NODE_SEARCH
3037 #undef RT_ADD_CHILD_4
3038 #undef RT_ADD_CHILD_16
3039 #undef RT_ADD_CHILD_48
3040 #undef RT_ADD_CHILD_256
3041 #undef RT_GROW_NODE_4
3042 #undef RT_GROW_NODE_16
3043 #undef RT_GROW_NODE_48
3044 #undef RT_REMOVE_CHILD_4
3045 #undef RT_REMOVE_CHILD_16
3046 #undef RT_REMOVE_CHILD_48
3047 #undef RT_REMOVE_CHILD_256
3048 #undef RT_SHRINK_NODE_16
3049 #undef RT_SHRINK_NODE_48
3050 #undef RT_SHRINK_NODE_256
3051 #undef RT_NODE_DELETE
3052 #undef RT_NODE_INSERT
3053 #undef RT_NODE_ITERATE_NEXT
3054 #undef RT_VERIFY_NODE
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define bmw_rightmost_one_pos(w)
Definition: bitmapset.h:79
uint32 bitmapword
Definition: bitmapset.h:44
#define BITS_PER_BITMAPWORD
Definition: bitmapset.h:43
#define bmw_popcount(w)
Definition: bitmapset.h:80
unsigned int uint32
Definition: c.h:506
#define pg_noinline
Definition: c.h:250
#define Min(x, y)
Definition: c.h:1004
#define pg_attribute_unused()
Definition: c.h:123
#define Assert(condition)
Definition: c.h:858
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
#define UINT64_FORMAT
Definition: c.h:549
#define pg_unreachable()
Definition: c.h:296
#define unlikely(x)
Definition: c.h:311
unsigned char uint8
Definition: c.h:504
size_t dsa_get_total_size(dsa_area *area)
Definition: dsa.c:1027
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:942
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:826
#define dsa_allocate0(area, size)
Definition: dsa.h:113
uint64 dsa_pointer
Definition: dsa.h:62
#define DSA_POINTER_FORMAT
Definition: dsa.h:69
#define dsa_allocate(area, size)
Definition: dsa.h:109
uint64 chunk
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:707
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1215
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
Size MemoryContextMemAllocated(MemoryContext context, bool recurse)
Definition: mcxt.c:762
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
static int pg_rightmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:111
static int pg_leftmost_one_pos64(uint64 word)
Definition: pg_bitutils.h:72
#define BITS_PER_BYTE
#define fprintf
Definition: port.h:242
void check_stack_depth(void)
Definition: postgres.c:3540
#define RT_FIND
Definition: radixtree.h:175
#define RT_ADD_CHILD_48
Definition: radixtree.h:227
#define RT_NODE_KIND_16
Definition: radixtree.h:359
#define RT_NODE_ITERATE_NEXT
Definition: radixtree.h:239
#define RT_SPAN
Definition: radixtree.h:312
#define RT_CHILD_PTR
Definition: radixtree.h:250
#define RT_STATS
Definition: radixtree.h:193
#define RT_NODE_4_GET_INSERTPOS
Definition: radixtree.h:210
#define RT_NODE_ITER
Definition: radixtree.h:251
#define RT_ALLOC_LEAF
Definition: radixtree.h:201
#define RT_NODE_SEARCH
Definition: radixtree.h:222
#define RT_INVALID_SLOT_IDX
Definition: radixtree.h:555
#define RT_CHILDPTR_IS_VALUE
Definition: radixtree.h:196
#define RT_REMOVE_CHILD_16
Definition: radixtree.h:233
#define RT_VALUE_IS_EMBEDDABLE
Definition: radixtree.h:197
#define RT_ALLOC_NODE
Definition: radixtree.h:200
RT_CHILD_PTR rootnode
Definition: radixtree.h:1827
#define RT_FREE_RECURSE
Definition: radixtree.h:204
#define RT_NODE_16_GET_INSERTPOS
Definition: radixtree.h:211
#define RT_NODE_48_GET_CHILD
Definition: radixtree.h:217
#define RT_ITERATE_NEXT
Definition: radixtree.h:186
tree ctl start_shift
Definition: radixtree.h:1887
#define RT_END_ITERATE
Definition: radixtree.h:187
#define RT_BEGIN_ITERATE
Definition: radixtree.h:185
tree
Definition: radixtree.h:1834
#define RT_GROW_NODE_4
Definition: radixtree.h:229
#define RT_NODE_MAX_SLOTS
Definition: radixtree.h:318
RT_SCOPE RT_RADIX_TREE *MemoryContext old_ctx
Definition: radixtree.h:1824
#define RT_PTR_SET_LOCAL
Definition: radixtree.h:208
#define RT_COPY_ARRAYS_FOR_INSERT
Definition: radixtree.h:214
#define RT_DELETE_RECURSIVE
Definition: radixtree.h:199
#define RT_STR(s)
Definition: radixtree.h:169
#define RT_GET_SLOT_RECURSIVE
Definition: radixtree.h:198
for(int i=0;i< RT_NUM_SIZE_CLASSES;i++)
Definition: radixtree.h:1856
#define RT_SHRINK_NODE_256
Definition: radixtree.h:238
#define RT_SHRINK_NODE_48
Definition: radixtree.h:237
#define RT_EXTEND_DOWN
Definition: radixtree.h:206
#define RT_ADD_CHILD_16
Definition: radixtree.h:226
#define RT_SIZE_CLASS_ELEM
Definition: radixtree.h:257
#define RT_NODE_KIND_48
Definition: radixtree.h:360
MemoryContextSwitchTo(old_ctx)
#define RT_NODE_KIND_256
Definition: radixtree.h:361
#define RT_NODE
Definition: radixtree.h:249
#define RT_CREATE
Definition: radixtree.h:173
#define RT_REMOVE_CHILD_256
Definition: radixtree.h:235
#define RT_NODE_4
Definition: radixtree.h:252
#define RT_NODE_256
Definition: radixtree.h:255
#define RT_SIZE_CLASS
Definition: radixtree.h:256
#define RT_RADIX_TREE_CONTROL
Definition: radixtree.h:244
#define RT_NODE_MUST_GROW(node)
Definition: radixtree.h:1132
#define RT_COPY_COMMON
Definition: radixtree.h:207
#define RT_FREE
Definition: radixtree.h:174
#define RT_NODE_48
Definition: radixtree.h:254
#define RT_SLAB_BLOCK_SIZE(size)
Definition: radixtree.h:368
#define RT_FREE_NODE
Definition: radixtree.h:202
#define RT_CLASS_16_LO
Definition: radixtree.h:260
#define RT_MAX_SHIFT
Definition: radixtree.h:324
#define RT_SHIFT_GET_MAX_VAL
Definition: radixtree.h:221
#define RT_MEMORY_USAGE
Definition: radixtree.h:191
#define RT_NODE_48_IS_CHUNK_USED
Definition: radixtree.h:216
#define RT_BM_BIT(x)
Definition: radixtree.h:334
#define RT_CHUNK_MASK
Definition: radixtree.h:321
#define RT_FANOUT_256
Definition: radixtree.h:504
#define RT_COPY_ARRAYS_AND_DELETE
Definition: radixtree.h:215
#define RT_CLASS_256
Definition: radixtree.h:263
#define RT_ADD_CHILD_4
Definition: radixtree.h:225
#define RT_REMOVE_CHILD_48
Definition: radixtree.h:234
#define RT_DUMP_NODE
Definition: radixtree.h:192
#define RT_ADD_CHILD_256
Definition: radixtree.h:228
#define RT_KEY_GET_SHIFT
Definition: radixtree.h:220
#define RT_SHIFT_ARRAYS_AND_DELETE
Definition: radixtree.h:213
#define RT_NODE_DELETE
Definition: radixtree.h:223
RT_SIZE_CLASS
Definition: radixtree.h:631
#define RT_NODE_16
Definition: radixtree.h:253
#define RT_NODE_INSERT
Definition: radixtree.h:224
tree ctl root
Definition: radixtree.h:1886
#define RT_MAX_LEVEL
Definition: radixtree.h:327
#define RT_GROW_NODE_16
Definition: radixtree.h:230
#define RT_CLASS_16_HI
Definition: radixtree.h:261
#define RT_NODE_256_GET_CHILD
Definition: radixtree.h:219
StaticAssertDecl(RT_FANOUT_4<=RT_FANOUT_4_MAX, "watch struct padding")
#define RT_SHRINK_NODE_16
Definition: radixtree.h:236
#define RT_NODE_KIND_4
Definition: radixtree.h:358
#define RT_PTR_ALLOC_IS_VALID(ptr)
Definition: radixtree.h:404
#define RT_GET_KEY_CHUNK(key, shift)
Definition: radixtree.h:330
#define RT_VERIFY_NODE
Definition: radixtree.h:240
#define RT_FANOUT_16_HI
Definition: radixtree.h:600
#define RT_GET_VALUE_SIZE(v)
Definition: radixtree.h:431
#define RT_GROW_NODE_48
Definition: radixtree.h:231
#define RT_PTR_ALLOC
Definition: radixtree.h:402
#define RT_NODE_256_IS_CHUNK_USED
Definition: radixtree.h:218
#define RT_FANOUT_16_MAX
Definition: radixtree.h:492
#define RT_SHIFT_ARRAYS_FOR_INSERT
Definition: radixtree.h:212
#define RT_FANOUT_48
Definition: radixtree.h:601
#define RT_NODE_16_SEARCH_EQ
Definition: radixtree.h:209
#define RT_BM_IDX(x)
Definition: radixtree.h:333
#define RT_SIZE_CLASS_INFO
Definition: radixtree.h:258
#define RT_EXTEND_UP
Definition: radixtree.h:205
#define RT_FANOUT_48_MAX
Definition: radixtree.h:502
#define RT_SET
Definition: radixtree.h:184
#define RT_FREE_LEAF
Definition: radixtree.h:203
#define RT_FANOUT_4_MAX
Definition: radixtree.h:489
#define RT_FANOUT_4
Definition: radixtree.h:611
#define RT_CLASS_48
Definition: radixtree.h:262
#define RT_FANOUT_16_LO
Definition: radixtree.h:598
#define RT_INVALID_PTR_ALLOC
Definition: radixtree.h:403
#define RT_REMOVE_CHILD_4
Definition: radixtree.h:232
#define RT_NUM_SIZE_CLASSES
Definition: radixtree.h:676
#define RT_CLASS_4
Definition: radixtree.h:259
static Vector8 vector8_broadcast(const uint8 c)
Definition: simd.h:135
static void vector8_load(Vector8 *v, const uint8 *s)
Definition: simd.h:108
uint64 Vector8
Definition: simd.h:60
MemoryContext SlabContextCreate(MemoryContext parent, const char *name, Size blockSize, Size chunkSize)
Definition: slab.c:322
Definition: lwlock.h:42
RT_NODE_ITER node_iters[RT_MAX_LEVEL]
Definition: radixtree.h:752
int cur_level
Definition: radixtree.h:754
RT_RADIX_TREE * tree
Definition: radixtree.h:746
uint64 key
Definition: radixtree.h:757
int top_level
Definition: radixtree.h:753
RT_NODE base
Definition: radixtree.h:528
RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER]
Definition: radixtree.h:533
uint8 chunks[RT_FANOUT_16_MAX]
Definition: radixtree.h:530
RT_PTR_ALLOC children[RT_FANOUT_256]
Definition: radixtree.h:574
RT_NODE base
Definition: radixtree.h:568
bitmapword isset[RT_BM_IDX(RT_FANOUT_256)]
Definition: radixtree.h:571
RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER]
Definition: radixtree.h:558
uint8 slot_idxs[RT_NODE_MAX_SLOTS]
Definition: radixtree.h:552
RT_NODE base
Definition: radixtree.h:542
bitmapword isset[RT_BM_IDX(RT_FANOUT_48_MAX)]
Definition: radixtree.h:545
RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER]
Definition: radixtree.h:523
RT_NODE base
Definition: radixtree.h:518
uint8 chunks[RT_FANOUT_4_MAX]
Definition: radixtree.h:520
RT_CHILD_PTR node
Definition: radixtree.h:733
uint8 kind
Definition: radixtree.h:375
uint8 count
Definition: radixtree.h:392
uint8 fanout
Definition: radixtree.h:384
RT_PTR_ALLOC root
Definition: radixtree.h:692
MemoryContext context
Definition: radixtree.h:707
MemoryContextData * node_slabs[RT_NUM_SIZE_CLASSES]
Definition: radixtree.h:715
MemoryContextData * leaf_context
Definition: radixtree.h:718
MemoryContextData * iter_context
Definition: radixtree.h:720
RT_RADIX_TREE_CONTROL * ctl
Definition: radixtree.h:710
const char * name
Definition: radixtree.h:642
Definition: dsa.c:348
Definition: type.h:95
#define RT_VALUE_TYPE
Definition: tidstore.c:106
#define RT_SCOPE
Definition: tidstore.c:103
#define RT_PREFIX
Definition: tidstore.c:101
RT_NODE * local
Definition: radixtree.h:419
RT_PTR_ALLOC alloc
Definition: radixtree.h:418
Datum bit(PG_FUNCTION_ARGS)
Definition: varbit.c:391