PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
dshash.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dshash.c
4 * Concurrent hash tables backed by dynamic shared memory areas.
5 *
6 * This is an open hashing hash table, with a linked list at each table
7 * entry. It supports dynamic resizing, as required to prevent the linked
8 * lists from growing too long on average. Currently, only growing is
9 * supported: the hash table never becomes smaller.
10 *
11 * To deal with concurrency, it has a fixed size set of partitions, each of
12 * which is independently locked. Each bucket maps to a partition; so insert,
13 * find and iterate operations normally only acquire one lock. Therefore,
14 * good concurrency is achieved whenever such operations don't collide at the
15 * lock partition level. However, when a resize operation begins, all
16 * partition locks must be acquired simultaneously for a brief period. This
17 * is only expected to happen a small number of times until a stable size is
18 * found, since growth is geometric.
19 *
20 * Future versions may support iterators and incremental resizing; for now
21 * the implementation is minimalist.
22 *
23 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
25 *
26 * IDENTIFICATION
27 * src/backend/lib/dshash.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32#include "postgres.h"
33
34#include "common/hashfn.h"
35#include "lib/dshash.h"
36#include "storage/lwlock.h"
37#include "utils/dsa.h"
38
39/*
40 * An item in the hash table. This wraps the user's entry object in an
41 * envelop that holds a pointer back to the bucket and a pointer to the next
42 * item in the bucket.
43 */
45{
46 /* The next item in the same bucket. */
48 /* The hashed key, to avoid having to recompute it. */
50 /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
51};
52
53/*
54 * The number of partitions for locking purposes. This is set to match
55 * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
56 * the buffer pool must be good enough for any other purpose. This could
57 * become a runtime parameter in future.
58 */
59#define DSHASH_NUM_PARTITIONS_LOG2 7
60#define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
61
62/* A magic value used to identify our hash tables. */
63#define DSHASH_MAGIC 0x75ff6a20
64
65/*
66 * Tracking information for each lock partition. Initially, each partition
67 * corresponds to one bucket, but each time the hash table grows, the buckets
68 * covered by each partition split so the number of buckets covered doubles.
69 *
70 * We might want to add padding here so that each partition is on a different
71 * cache line, but doing so would bloat this structure considerably.
72 */
73typedef struct dshash_partition
74{
75 LWLock lock; /* Protects all buckets in this partition. */
76 size_t count; /* # of items in this partition's buckets */
78
79/*
80 * The head object for a hash table. This will be stored in dynamic shared
81 * memory.
82 */
84{
89
90 /*
91 * The following members are written to only when ALL partitions locks are
92 * held. They can be read when any one partition lock is held.
93 */
94
95 /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
96 size_t size_log2; /* log2(number of buckets) */
97 dsa_pointer buckets; /* current bucket array */
99
100/*
101 * Per-backend state for a dynamic hash table.
102 */
104{
105 dsa_area *area; /* Backing dynamic shared memory area. */
106 dshash_parameters params; /* Parameters. */
107 void *arg; /* User-supplied data pointer. */
108 dshash_table_control *control; /* Control object in DSM. */
109 dsa_pointer *buckets; /* Current bucket pointers in DSM. */
110 size_t size_log2; /* log2(number of buckets) */
111};
112
113/* Given a pointer to an item, find the entry (user data) it holds. */
114#define ENTRY_FROM_ITEM(item) \
115 ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
116
117/* Given a pointer to an entry, find the item that holds it. */
118#define ITEM_FROM_ENTRY(entry) \
119 ((dshash_table_item *)((char *)(entry) - \
120 MAXALIGN(sizeof(dshash_table_item))))
121
122/* How many resize operations (bucket splits) have there been? */
123#define NUM_SPLITS(size_log2) \
124 (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
125
126/* How many buckets are there in a given size? */
127#define NUM_BUCKETS(size_log2) \
128 (((size_t) 1) << (size_log2))
129
130/* How many buckets are there in each partition at a given size? */
131#define BUCKETS_PER_PARTITION(size_log2) \
132 (((size_t) 1) << NUM_SPLITS(size_log2))
133
134/* Max entries before we need to grow. Half + quarter = 75% load factor. */
135#define MAX_COUNT_PER_PARTITION(hash_table) \
136 (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
137 BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
138
139/* Choose partition based on the highest order bits of the hash. */
140#define PARTITION_FOR_HASH(hash) \
141 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
142
143/*
144 * Find the bucket index for a given hash and table size. Each time the table
145 * doubles in size, the appropriate bucket for a given hash value doubles and
146 * possibly adds one, depending on the newly revealed bit, so that all buckets
147 * are split.
148 */
149#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
150 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
151
152/* The index of the first bucket in a given partition. */
153#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154 ((partition) << NUM_SPLITS(size_log2))
155
156/* Choose partition based on bucket index. */
157#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
158 ((bucket_idx) >> NUM_SPLITS(size_log2))
159
160/* The head of the active bucket for a given hash value (lvalue). */
161#define BUCKET_FOR_HASH(hash_table, hash) \
162 (hash_table->buckets[ \
163 BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
164 hash_table->size_log2)])
165
166static void delete_item(dshash_table *hash_table,
167 dshash_table_item *item);
168static void resize(dshash_table *hash_table, size_t new_size_log2);
169static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
170static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
171 const void *key,
172 dsa_pointer item_pointer);
173static void insert_item_into_bucket(dshash_table *hash_table,
174 dsa_pointer item_pointer,
175 dshash_table_item *item,
176 dsa_pointer *bucket);
178 const void *key,
179 dsa_pointer *bucket);
180static bool delete_key_from_bucket(dshash_table *hash_table,
181 const void *key,
182 dsa_pointer *bucket_head);
183static bool delete_item_from_bucket(dshash_table *hash_table,
184 dshash_table_item *item,
185 dsa_pointer *bucket_head);
186static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
187static inline bool equal_keys(dshash_table *hash_table,
188 const void *a, const void *b);
189static inline void copy_key(dshash_table *hash_table, void *dest,
190 const void *src);
191
192#define PARTITION_LOCK(hash_table, i) \
193 (&(hash_table)->control->partitions[(i)].lock)
194
195#define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
196 Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
197 DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
198
199/*
200 * Create a new hash table backed by the given dynamic shared area, with the
201 * given parameters. The returned object is allocated in backend-local memory
202 * using the current MemoryContext. 'arg' will be passed through to the
203 * compare, hash, and copy functions.
204 */
206dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
207{
208 dshash_table *hash_table;
209 dsa_pointer control;
210
211 /* Allocate the backend-local object representing the hash table. */
212 hash_table = palloc(sizeof(dshash_table));
213
214 /* Allocate the control object in shared memory. */
215 control = dsa_allocate(area, sizeof(dshash_table_control));
216
217 /* Set up the local and shared hash table structs. */
218 hash_table->area = area;
219 hash_table->params = *params;
220 hash_table->arg = arg;
221 hash_table->control = dsa_get_address(area, control);
222 hash_table->control->handle = control;
223 hash_table->control->magic = DSHASH_MAGIC;
224 hash_table->control->lwlock_tranche_id = params->tranche_id;
225
226 /* Set up the array of lock partitions. */
227 {
229 int tranche_id = hash_table->control->lwlock_tranche_id;
230 int i;
231
232 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
233 {
234 LWLockInitialize(&partitions[i].lock, tranche_id);
235 partitions[i].count = 0;
236 }
237 }
238
239 /*
240 * Set up the initial array of buckets. Our initial size is the same as
241 * the number of partitions.
242 */
244 hash_table->control->buckets =
248 if (!DsaPointerIsValid(hash_table->control->buckets))
249 {
250 dsa_free(area, control);
252 (errcode(ERRCODE_OUT_OF_MEMORY),
253 errmsg("out of memory"),
254 errdetail("Failed on DSA request of size %zu.",
256 }
257 hash_table->buckets = dsa_get_address(area,
258 hash_table->control->buckets);
259 hash_table->size_log2 = hash_table->control->size_log2;
260
261 return hash_table;
262}
263
264/*
265 * Attach to an existing hash table using a handle. The returned object is
266 * allocated in backend-local memory using the current MemoryContext. 'arg'
267 * will be passed through to the compare and hash functions.
268 */
271 dshash_table_handle handle, void *arg)
272{
273 dshash_table *hash_table;
274 dsa_pointer control;
275
276 /* Allocate the backend-local object representing the hash table. */
277 hash_table = palloc(sizeof(dshash_table));
278
279 /* Find the control object in shared memory. */
280 control = handle;
281
282 /* Set up the local hash table struct. */
283 hash_table->area = area;
284 hash_table->params = *params;
285 hash_table->arg = arg;
286 hash_table->control = dsa_get_address(area, control);
287 Assert(hash_table->control->magic == DSHASH_MAGIC);
288
289 /*
290 * These will later be set to the correct values by
291 * ensure_valid_bucket_pointers(), at which time we'll be holding a
292 * partition lock for interlocking against concurrent resizing.
293 */
294 hash_table->buckets = NULL;
295 hash_table->size_log2 = 0;
296
297 return hash_table;
298}
299
300/*
301 * Detach from a hash table. This frees backend-local resources associated
302 * with the hash table, but the hash table will continue to exist until it is
303 * either explicitly destroyed (by a backend that is still attached to it), or
304 * the area that backs it is returned to the operating system.
305 */
306void
308{
310
311 /* The hash table may have been destroyed. Just free local memory. */
312 pfree(hash_table);
313}
314
315/*
316 * Destroy a hash table, returning all memory to the area. The caller must be
317 * certain that no other backend will attempt to access the hash table before
318 * calling this function. Other backend must explicitly call dshash_detach to
319 * free up backend-local memory associated with the hash table. The backend
320 * that calls dshash_destroy must not call dshash_detach.
321 */
322void
324{
325 size_t size;
326 size_t i;
327
328 Assert(hash_table->control->magic == DSHASH_MAGIC);
330
331 /* Free all the entries. */
332 size = NUM_BUCKETS(hash_table->size_log2);
333 for (i = 0; i < size; ++i)
334 {
335 dsa_pointer item_pointer = hash_table->buckets[i];
336
337 while (DsaPointerIsValid(item_pointer))
338 {
339 dshash_table_item *item;
340 dsa_pointer next_item_pointer;
341
342 item = dsa_get_address(hash_table->area, item_pointer);
343 next_item_pointer = item->next;
344 dsa_free(hash_table->area, item_pointer);
345 item_pointer = next_item_pointer;
346 }
347 }
348
349 /*
350 * Vandalize the control block to help catch programming errors where
351 * other backends access the memory formerly occupied by this hash table.
352 */
353 hash_table->control->magic = 0;
354
355 /* Free the active table and control object. */
356 dsa_free(hash_table->area, hash_table->control->buckets);
357 dsa_free(hash_table->area, hash_table->control->handle);
358
359 pfree(hash_table);
360}
361
362/*
363 * Get a handle that can be used by other processes to attach to this hash
364 * table.
365 */
368{
369 Assert(hash_table->control->magic == DSHASH_MAGIC);
370
371 return hash_table->control->handle;
372}
373
374/*
375 * Look up an entry, given a key. Returns a pointer to an entry if one can be
376 * found with the given key. Returns NULL if the key is not found. If a
377 * non-NULL value is returned, the entry is locked and must be released by
378 * calling dshash_release_lock. If an error is raised before
379 * dshash_release_lock is called, the lock will be released automatically, but
380 * the caller must take care to ensure that the entry is not left corrupted.
381 * The lock mode is either shared or exclusive depending on 'exclusive'.
382 *
383 * The caller must not hold a lock already.
384 *
385 * Note that the lock held is in fact an LWLock, so interrupts will be held on
386 * return from this function, and not resumed until dshash_release_lock is
387 * called. It is a very good idea for the caller to release the lock quickly.
388 */
389void *
390dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
391{
393 size_t partition;
394 dshash_table_item *item;
395
396 hash = hash_key(hash_table, key);
397 partition = PARTITION_FOR_HASH(hash);
398
399 Assert(hash_table->control->magic == DSHASH_MAGIC);
401
402 LWLockAcquire(PARTITION_LOCK(hash_table, partition),
403 exclusive ? LW_EXCLUSIVE : LW_SHARED);
405
406 /* Search the active bucket. */
407 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
408
409 if (!item)
410 {
411 /* Not found. */
412 LWLockRelease(PARTITION_LOCK(hash_table, partition));
413 return NULL;
414 }
415 else
416 {
417 /* The caller will free the lock by calling dshash_release_lock. */
418 return ENTRY_FROM_ITEM(item);
419 }
420}
421
422/*
423 * Returns a pointer to an exclusively locked item which must be released with
424 * dshash_release_lock. If the key is found in the hash table, 'found' is set
425 * to true and a pointer to the existing entry is returned. If the key is not
426 * found, 'found' is set to false, and a pointer to a newly created entry is
427 * returned.
428 *
429 * Notes above dshash_find() regarding locking and error handling equally
430 * apply here.
431 */
432void *
434 const void *key,
435 bool *found)
436{
438 size_t partition_index;
439 dshash_partition *partition;
440 dshash_table_item *item;
441
442 hash = hash_key(hash_table, key);
443 partition_index = PARTITION_FOR_HASH(hash);
444 partition = &hash_table->control->partitions[partition_index];
445
446 Assert(hash_table->control->magic == DSHASH_MAGIC);
448
449restart:
450 LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
453
454 /* Search the active bucket. */
455 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
456
457 if (item)
458 *found = true;
459 else
460 {
461 *found = false;
462
463 /* Check if we are getting too full. */
464 if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
465 {
466 /*
467 * The load factor (= keys / buckets) for all buckets protected by
468 * this partition is > 0.75. Presumably the same applies
469 * generally across the whole hash table (though we don't attempt
470 * to track that directly to avoid contention on some kind of
471 * central counter; we just assume that this partition is
472 * representative). This is a good time to resize.
473 *
474 * Give up our existing lock first, because resizing needs to
475 * reacquire all the locks in the right order to avoid deadlocks.
476 */
477 LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
478 resize(hash_table, hash_table->size_log2 + 1);
479
480 goto restart;
481 }
482
483 /* Finally we can try to insert the new item. */
484 item = insert_into_bucket(hash_table, key,
485 &BUCKET_FOR_HASH(hash_table, hash));
486 item->hash = hash;
487 /* Adjust per-lock-partition counter for load factor knowledge. */
488 ++partition->count;
489 }
490
491 /* The caller must release the lock with dshash_release_lock. */
492 return ENTRY_FROM_ITEM(item);
493}
494
495/*
496 * Remove an entry by key. Returns true if the key was found and the
497 * corresponding entry was removed.
498 *
499 * To delete an entry that you already have a pointer to, see
500 * dshash_delete_entry.
501 */
502bool
503dshash_delete_key(dshash_table *hash_table, const void *key)
504{
506 size_t partition;
507 bool found;
508
509 Assert(hash_table->control->magic == DSHASH_MAGIC);
511
512 hash = hash_key(hash_table, key);
513 partition = PARTITION_FOR_HASH(hash);
514
515 LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
517
518 if (delete_key_from_bucket(hash_table, key,
519 &BUCKET_FOR_HASH(hash_table, hash)))
520 {
521 Assert(hash_table->control->partitions[partition].count > 0);
522 found = true;
523 --hash_table->control->partitions[partition].count;
524 }
525 else
526 found = false;
527
528 LWLockRelease(PARTITION_LOCK(hash_table, partition));
529
530 return found;
531}
532
533/*
534 * Remove an entry. The entry must already be exclusively locked, and must
535 * have been obtained by dshash_find or dshash_find_or_insert. Note that this
536 * function releases the lock just like dshash_release_lock.
537 *
538 * To delete an entry by key, see dshash_delete_key.
539 */
540void
541dshash_delete_entry(dshash_table *hash_table, void *entry)
542{
543 dshash_table_item *item = ITEM_FROM_ENTRY(entry);
544 size_t partition = PARTITION_FOR_HASH(item->hash);
545
546 Assert(hash_table->control->magic == DSHASH_MAGIC);
547 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
548 LW_EXCLUSIVE));
549
550 delete_item(hash_table, item);
551 LWLockRelease(PARTITION_LOCK(hash_table, partition));
552}
553
554/*
555 * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
556 */
557void
558dshash_release_lock(dshash_table *hash_table, void *entry)
559{
560 dshash_table_item *item = ITEM_FROM_ENTRY(entry);
561 size_t partition_index = PARTITION_FOR_HASH(item->hash);
562
563 Assert(hash_table->control->magic == DSHASH_MAGIC);
564
565 LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
566}
567
568/*
569 * A compare function that forwards to memcmp.
570 */
571int
572dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
573{
574 return memcmp(a, b, size);
575}
576
577/*
578 * A hash function that forwards to tag_hash.
579 */
581dshash_memhash(const void *v, size_t size, void *arg)
582{
583 return tag_hash(v, size);
584}
585
586/*
587 * A copy function that forwards to memcpy.
588 */
589void
590dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
591{
592 (void) memcpy(dest, src, size);
593}
594
595/*
596 * A compare function that forwards to strcmp.
597 */
598int
599dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
600{
601 Assert(strlen((const char *) a) < size);
602 Assert(strlen((const char *) b) < size);
603
604 return strcmp((const char *) a, (const char *) b);
605}
606
607/*
608 * A hash function that forwards to string_hash.
609 */
611dshash_strhash(const void *v, size_t size, void *arg)
612{
613 Assert(strlen((const char *) v) < size);
614
615 return string_hash((const char *) v, size);
616}
617
618/*
619 * A copy function that forwards to strcpy.
620 */
621void
622dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
623{
624 Assert(strlen((const char *) src) < size);
625
626 (void) strcpy((char *) dest, (const char *) src);
627}
628
629/*
630 * Sequentially scan through dshash table and return all the elements one by
631 * one, return NULL when all elements have been returned.
632 *
633 * dshash_seq_term needs to be called when a scan finished. The caller may
634 * delete returned elements midst of a scan by using dshash_delete_current()
635 * if exclusive = true.
636 */
637void
639 bool exclusive)
640{
641 status->hash_table = hash_table;
642 status->curbucket = 0;
643 status->nbuckets = 0;
644 status->curitem = NULL;
646 status->curpartition = -1;
647 status->exclusive = exclusive;
648}
649
650/*
651 * Returns the next element.
652 *
653 * Returned elements are locked and the caller may not release the lock. It is
654 * released by future calls to dshash_seq_next() or dshash_seq_term().
655 */
656void *
658{
659 dsa_pointer next_item_pointer;
660
661 /*
662 * Not yet holding any partition locks. Need to determine the size of the
663 * hash table, it could have been resized since we were looking last.
664 * Since we iterate in partition order, we can start by unconditionally
665 * lock partition 0.
666 *
667 * Once we hold the lock, no resizing can happen until the scan ends. So
668 * we don't need to repeatedly call ensure_valid_bucket_pointers().
669 */
670 if (status->curpartition == -1)
671 {
672 Assert(status->curbucket == 0);
674
675 status->curpartition = 0;
676
678 status->curpartition),
679 status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
680
682
683 status->nbuckets =
685 next_item_pointer = status->hash_table->buckets[status->curbucket];
686 }
687 else
688 next_item_pointer = status->pnextitem;
689
691 status->curpartition),
692 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
693
694 /* Move to the next bucket if we finished the current bucket */
695 while (!DsaPointerIsValid(next_item_pointer))
696 {
697 int next_partition;
698
699 if (++status->curbucket >= status->nbuckets)
700 {
701 /* all buckets have been scanned. finish. */
702 return NULL;
703 }
704
705 /* Check if move to the next partition */
706 next_partition =
708 status->hash_table->size_log2);
709
710 if (status->curpartition != next_partition)
711 {
712 /*
713 * Move to the next partition. Lock the next partition then
714 * release the current, not in the reverse order to avoid
715 * concurrent resizing. Avoid dead lock by taking lock in the
716 * same order with resize().
717 */
719 next_partition),
720 status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
722 status->curpartition));
723 status->curpartition = next_partition;
724 }
725
726 next_item_pointer = status->hash_table->buckets[status->curbucket];
727 }
728
729 status->curitem =
730 dsa_get_address(status->hash_table->area, next_item_pointer);
731
732 /*
733 * The caller may delete the item. Store the next item in case of
734 * deletion.
735 */
736 status->pnextitem = status->curitem->next;
737
738 return ENTRY_FROM_ITEM(status->curitem);
739}
740
741/*
742 * Terminates the seqscan and release all locks.
743 *
744 * Needs to be called after finishing or when exiting a seqscan.
745 */
746void
748{
749 if (status->curpartition >= 0)
751}
752
753/*
754 * Remove the current entry of the seq scan.
755 */
756void
758{
759 dshash_table *hash_table = status->hash_table;
760 dshash_table_item *item = status->curitem;
761 size_t partition PG_USED_FOR_ASSERTS_ONLY;
762
763 partition = PARTITION_FOR_HASH(item->hash);
764
765 Assert(status->exclusive);
766 Assert(hash_table->control->magic == DSHASH_MAGIC);
767 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
768 LW_EXCLUSIVE));
769
770 delete_item(hash_table, item);
771}
772
773/*
774 * Print debugging information about the internal state of the hash table to
775 * stderr. The caller must hold no partition locks.
776 */
777void
779{
780 size_t i;
781 size_t j;
782
783 Assert(hash_table->control->magic == DSHASH_MAGIC);
785
786 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
787 {
788 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
790 }
791
793
794 fprintf(stderr,
795 "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
796 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
797 {
798 dshash_partition *partition = &hash_table->control->partitions[i];
799 size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
800 size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
801
802 fprintf(stderr, " partition %zu\n", i);
803 fprintf(stderr,
804 " active buckets (key count = %zu)\n", partition->count);
805
806 for (j = begin; j < end; ++j)
807 {
808 size_t count = 0;
809 dsa_pointer bucket = hash_table->buckets[j];
810
811 while (DsaPointerIsValid(bucket))
812 {
813 dshash_table_item *item;
814
815 item = dsa_get_address(hash_table->area, bucket);
816
817 bucket = item->next;
818 ++count;
819 }
820 fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
821 }
822 }
823
824 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
825 LWLockRelease(PARTITION_LOCK(hash_table, i));
826}
827
828/*
829 * Delete a locked item to which we have a pointer.
830 */
831static void
833{
834 size_t hash = item->hash;
835 size_t partition = PARTITION_FOR_HASH(hash);
836
837 Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
838
839 if (delete_item_from_bucket(hash_table, item,
840 &BUCKET_FOR_HASH(hash_table, hash)))
841 {
842 Assert(hash_table->control->partitions[partition].count > 0);
843 --hash_table->control->partitions[partition].count;
844 }
845 else
846 {
847 Assert(false);
848 }
849}
850
851/*
852 * Grow the hash table if necessary to the requested number of buckets. The
853 * requested size must be double some previously observed size.
854 *
855 * Must be called without any partition lock held.
856 */
857static void
858resize(dshash_table *hash_table, size_t new_size_log2)
859{
860 dsa_pointer old_buckets;
861 dsa_pointer new_buckets_shared;
862 dsa_pointer *new_buckets;
863 size_t size;
864 size_t new_size = ((size_t) 1) << new_size_log2;
865 size_t i;
866
867 /*
868 * Acquire the locks for all lock partitions. This is expensive, but we
869 * shouldn't have to do it many times.
870 */
871 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
872 {
873 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
874
876 if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
877 {
878 /*
879 * Another backend has already increased the size; we can avoid
880 * obtaining all the locks and return early.
881 */
882 LWLockRelease(PARTITION_LOCK(hash_table, 0));
883 return;
884 }
885 }
886
887 Assert(new_size_log2 == hash_table->control->size_log2 + 1);
888
889 /* Allocate the space for the new table. */
890 new_buckets_shared =
891 dsa_allocate_extended(hash_table->area,
892 sizeof(dsa_pointer) * new_size,
894 new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
895
896 /*
897 * We've allocated the new bucket array; all that remains to do now is to
898 * reinsert all items, which amounts to adjusting all the pointers.
899 */
900 size = ((size_t) 1) << hash_table->control->size_log2;
901 for (i = 0; i < size; ++i)
902 {
903 dsa_pointer item_pointer = hash_table->buckets[i];
904
905 while (DsaPointerIsValid(item_pointer))
906 {
907 dshash_table_item *item;
908 dsa_pointer next_item_pointer;
909
910 item = dsa_get_address(hash_table->area, item_pointer);
911 next_item_pointer = item->next;
912 insert_item_into_bucket(hash_table, item_pointer, item,
913 &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
914 new_size_log2)]);
915 item_pointer = next_item_pointer;
916 }
917 }
918
919 /* Swap the hash table into place and free the old one. */
920 old_buckets = hash_table->control->buckets;
921 hash_table->control->buckets = new_buckets_shared;
922 hash_table->control->size_log2 = new_size_log2;
923 hash_table->buckets = new_buckets;
924 dsa_free(hash_table->area, old_buckets);
925
926 /* Release all the locks. */
927 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
928 LWLockRelease(PARTITION_LOCK(hash_table, i));
929}
930
931/*
932 * Make sure that our backend-local bucket pointers are up to date. The
933 * caller must have locked one lock partition, which prevents resize() from
934 * running concurrently.
935 */
936static inline void
938{
939 if (hash_table->size_log2 != hash_table->control->size_log2)
940 {
941 hash_table->buckets = dsa_get_address(hash_table->area,
942 hash_table->control->buckets);
943 hash_table->size_log2 = hash_table->control->size_log2;
944 }
945}
946
947/*
948 * Scan a locked bucket for a match, using the provided compare function.
949 */
950static inline dshash_table_item *
951find_in_bucket(dshash_table *hash_table, const void *key,
952 dsa_pointer item_pointer)
953{
954 while (DsaPointerIsValid(item_pointer))
955 {
956 dshash_table_item *item;
957
958 item = dsa_get_address(hash_table->area, item_pointer);
959 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
960 return item;
961 item_pointer = item->next;
962 }
963 return NULL;
964}
965
966/*
967 * Insert an already-allocated item into a bucket.
968 */
969static void
971 dsa_pointer item_pointer,
972 dshash_table_item *item,
973 dsa_pointer *bucket)
974{
975 Assert(item == dsa_get_address(hash_table->area, item_pointer));
976
977 item->next = *bucket;
978 *bucket = item_pointer;
979}
980
981/*
982 * Allocate space for an entry with the given key and insert it into the
983 * provided bucket.
984 */
985static dshash_table_item *
987 const void *key,
988 dsa_pointer *bucket)
989{
990 dsa_pointer item_pointer;
991 dshash_table_item *item;
992
993 item_pointer = dsa_allocate(hash_table->area,
994 hash_table->params.entry_size +
995 MAXALIGN(sizeof(dshash_table_item)));
996 item = dsa_get_address(hash_table->area, item_pointer);
997 copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
998 insert_item_into_bucket(hash_table, item_pointer, item, bucket);
999 return item;
1000}
1001
1002/*
1003 * Search a bucket for a matching key and delete it.
1004 */
1005static bool
1007 const void *key,
1008 dsa_pointer *bucket_head)
1009{
1010 while (DsaPointerIsValid(*bucket_head))
1011 {
1012 dshash_table_item *item;
1013
1014 item = dsa_get_address(hash_table->area, *bucket_head);
1015
1016 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
1017 {
1019
1020 next = item->next;
1021 dsa_free(hash_table->area, *bucket_head);
1022 *bucket_head = next;
1023
1024 return true;
1025 }
1026 bucket_head = &item->next;
1027 }
1028 return false;
1029}
1030
1031/*
1032 * Delete the specified item from the bucket.
1033 */
1034static bool
1036 dshash_table_item *item,
1037 dsa_pointer *bucket_head)
1038{
1039 while (DsaPointerIsValid(*bucket_head))
1040 {
1041 dshash_table_item *bucket_item;
1042
1043 bucket_item = dsa_get_address(hash_table->area, *bucket_head);
1044
1045 if (bucket_item == item)
1046 {
1048
1049 next = item->next;
1050 dsa_free(hash_table->area, *bucket_head);
1051 *bucket_head = next;
1052 return true;
1053 }
1054 bucket_head = &bucket_item->next;
1055 }
1056 return false;
1057}
1058
1059/*
1060 * Compute the hash value for a key.
1061 */
1062static inline dshash_hash
1063hash_key(dshash_table *hash_table, const void *key)
1064{
1065 return hash_table->params.hash_function(key,
1066 hash_table->params.key_size,
1067 hash_table->arg);
1068}
1069
1070/*
1071 * Check whether two keys compare equal.
1072 */
1073static inline bool
1074equal_keys(dshash_table *hash_table, const void *a, const void *b)
1075{
1076 return hash_table->params.compare_function(a, b,
1077 hash_table->params.key_size,
1078 hash_table->arg) == 0;
1079}
1080
1081/*
1082 * Copy a key.
1083 */
1084static inline void
1085copy_key(dshash_table *hash_table, void *dest, const void *src)
1086{
1087 hash_table->params.copy_function(dest, src,
1088 hash_table->params.key_size,
1089 hash_table->arg);
1090}
static int32 next
Definition: blutils.c:224
#define MAXALIGN(LEN)
Definition: c.h:782
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:224
uint32_t uint32
Definition: c.h:502
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:942
dsa_pointer dsa_allocate_extended(dsa_area *area, size_t size, int flags)
Definition: dsa.c:671
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:826
uint64 dsa_pointer
Definition: dsa.h:62
#define dsa_allocate(area, size)
Definition: dsa.h:109
#define InvalidDsaPointer
Definition: dsa.h:78
#define DSA_ALLOC_NO_OOM
Definition: dsa.h:74
#define DsaPointerIsValid(x)
Definition: dsa.h:106
#define DSA_ALLOC_HUGE
Definition: dsa.h:73
#define DSA_ALLOC_ZERO
Definition: dsa.h:75
bool dshash_delete_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:503
void dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
Definition: dshash.c:590
void dshash_delete_entry(dshash_table *hash_table, void *entry)
Definition: dshash.c:541
void dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
Definition: dshash.c:622
void dshash_destroy(dshash_table *hash_table)
Definition: dshash.c:323
void dshash_release_lock(dshash_table *hash_table, void *entry)
Definition: dshash.c:558
#define PARTITION_LOCK(hash_table, i)
Definition: dshash.c:192
void dshash_detach(dshash_table *hash_table)
Definition: dshash.c:307
void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, bool exclusive)
Definition: dshash.c:638
void * dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
Definition: dshash.c:390
#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)
Definition: dshash.c:149
static bool delete_key_from_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket_head)
Definition: dshash.c:1006
dshash_hash dshash_strhash(const void *v, size_t size, void *arg)
Definition: dshash.c:611
static dshash_hash hash_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:1063
#define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table)
Definition: dshash.c:195
#define ITEM_FROM_ENTRY(entry)
Definition: dshash.c:118
dshash_table_handle dshash_get_hash_table_handle(dshash_table *hash_table)
Definition: dshash.c:367
void dshash_dump(dshash_table *hash_table)
Definition: dshash.c:778
static bool equal_keys(dshash_table *hash_table, const void *a, const void *b)
Definition: dshash.c:1074
dshash_table * dshash_attach(dsa_area *area, const dshash_parameters *params, dshash_table_handle handle, void *arg)
Definition: dshash.c:270
void dshash_seq_term(dshash_seq_status *status)
Definition: dshash.c:747
#define DSHASH_MAGIC
Definition: dshash.c:63
static void insert_item_into_bucket(dshash_table *hash_table, dsa_pointer item_pointer, dshash_table_item *item, dsa_pointer *bucket)
Definition: dshash.c:970
static bool delete_item_from_bucket(dshash_table *hash_table, dshash_table_item *item, dsa_pointer *bucket_head)
Definition: dshash.c:1035
#define DSHASH_NUM_PARTITIONS
Definition: dshash.c:60
int dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:599
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)
Definition: dshash.c:157
static dshash_table_item * insert_into_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket)
Definition: dshash.c:986
void * dshash_find_or_insert(dshash_table *hash_table, const void *key, bool *found)
Definition: dshash.c:433
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)
Definition: dshash.c:153
#define MAX_COUNT_PER_PARTITION(hash_table)
Definition: dshash.c:135
#define ENTRY_FROM_ITEM(item)
Definition: dshash.c:114
#define DSHASH_NUM_PARTITIONS_LOG2
Definition: dshash.c:59
static void delete_item(dshash_table *hash_table, dshash_table_item *item)
Definition: dshash.c:832
dshash_hash dshash_memhash(const void *v, size_t size, void *arg)
Definition: dshash.c:581
#define NUM_BUCKETS(size_log2)
Definition: dshash.c:127
static void ensure_valid_bucket_pointers(dshash_table *hash_table)
Definition: dshash.c:937
static dshash_table_item * find_in_bucket(dshash_table *hash_table, const void *key, dsa_pointer item_pointer)
Definition: dshash.c:951
void * dshash_seq_next(dshash_seq_status *status)
Definition: dshash.c:657
dshash_table * dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
Definition: dshash.c:206
static void resize(dshash_table *hash_table, size_t new_size_log2)
Definition: dshash.c:858
static void copy_key(dshash_table *hash_table, void *dest, const void *src)
Definition: dshash.c:1085
int dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:572
struct dshash_table_control dshash_table_control
#define PARTITION_FOR_HASH(hash)
Definition: dshash.c:140
struct dshash_partition dshash_partition
#define BUCKET_FOR_HASH(hash_table, hash)
Definition: dshash.c:161
void dshash_delete_current(dshash_seq_status *status)
Definition: dshash.c:757
dsa_pointer dshash_table_handle
Definition: dshash.h:24
uint32 dshash_hash
Definition: dshash.h:30
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
Assert(PointerIsAligned(start, uint64))
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int j
Definition: isn.c:78
int i
Definition: isn.c:77
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1985
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2029
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:721
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void pfree(void *pointer)
Definition: mcxt.c:2146
void * palloc(Size size)
Definition: mcxt.c:1939
void * arg
static int partitions
Definition: pgbench.c:224
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
Definition: lwlock.h:42
Definition: dsa.c:348
dshash_hash_function hash_function
Definition: dshash.h:59
size_t key_size
Definition: dshash.h:56
dshash_compare_function compare_function
Definition: dshash.h:58
dshash_copy_function copy_function
Definition: dshash.h:60
size_t entry_size
Definition: dshash.h:57
LWLock lock
Definition: dshash.c:75
size_t count
Definition: dshash.c:76
bool exclusive
Definition: dshash.h:80
dshash_table_item * curitem
Definition: dshash.h:77
dsa_pointer pnextitem
Definition: dshash.h:78
int curpartition
Definition: dshash.h:79
dshash_table * hash_table
Definition: dshash.h:74
size_t size_log2
Definition: dshash.c:96
dshash_partition partitions[DSHASH_NUM_PARTITIONS]
Definition: dshash.c:87
dshash_table_handle handle
Definition: dshash.c:85
dsa_pointer buckets
Definition: dshash.c:97
dshash_hash hash
Definition: dshash.c:49
dsa_pointer next
Definition: dshash.c:47
dshash_parameters params
Definition: dshash.c:106
dsa_pointer * buckets
Definition: dshash.c:109
dsa_area * area
Definition: dshash.c:105
void * arg
Definition: dshash.c:107
size_t size_log2
Definition: dshash.c:110
dshash_table_control * control
Definition: dshash.c:108