PostgreSQL Source Code  git master
dshash.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dshash.c
4  * Concurrent hash tables backed by dynamic shared memory areas.
5  *
6  * This is an open hashing hash table, with a linked list at each table
7  * entry. It supports dynamic resizing, as required to prevent the linked
8  * lists from growing too long on average. Currently, only growing is
9  * supported: the hash table never becomes smaller.
10  *
11  * To deal with concurrency, it has a fixed size set of partitions, each of
12  * which is independently locked. Each bucket maps to a partition; so insert,
13  * find and iterate operations normally only acquire one lock. Therefore,
14  * good concurrency is achieved whenever such operations don't collide at the
15  * lock partition level. However, when a resize operation begins, all
16  * partition locks must be acquired simultaneously for a brief period. This
17  * is only expected to happen a small number of times until a stable size is
18  * found, since growth is geometric.
19  *
20  * Future versions may support iterators and incremental resizing; for now
21  * the implementation is minimalist.
22  *
23  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  * IDENTIFICATION
27  * src/backend/lib/dshash.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "common/hashfn.h"
35 #include "lib/dshash.h"
36 #include "storage/lwlock.h"
37 #include "utils/dsa.h"
38 
39 /*
40  * An item in the hash table. This wraps the user's entry object in an
41  * envelop that holds a pointer back to the bucket and a pointer to the next
42  * item in the bucket.
43  */
45 {
46  /* The next item in the same bucket. */
48  /* The hashed key, to avoid having to recompute it. */
50  /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
51 };
52 
53 /*
54  * The number of partitions for locking purposes. This is set to match
55  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
56  * the buffer pool must be good enough for any other purpose. This could
57  * become a runtime parameter in future.
58  */
59 #define DSHASH_NUM_PARTITIONS_LOG2 7
60 #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
61 
62 /* A magic value used to identify our hash tables. */
63 #define DSHASH_MAGIC 0x75ff6a20
64 
65 /*
66  * Tracking information for each lock partition. Initially, each partition
67  * corresponds to one bucket, but each time the hash table grows, the buckets
68  * covered by each partition split so the number of buckets covered doubles.
69  *
70  * We might want to add padding here so that each partition is on a different
71  * cache line, but doing so would bloat this structure considerably.
72  */
73 typedef struct dshash_partition
74 {
75  LWLock lock; /* Protects all buckets in this partition. */
76  size_t count; /* # of items in this partition's buckets */
78 
79 /*
80  * The head object for a hash table. This will be stored in dynamic shared
81  * memory.
82  */
83 typedef struct dshash_table_control
84 {
89 
90  /*
91  * The following members are written to only when ALL partitions locks are
92  * held. They can be read when any one partition lock is held.
93  */
94 
95  /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
96  size_t size_log2; /* log2(number of buckets) */
97  dsa_pointer buckets; /* current bucket array */
99 
100 /*
101  * Per-backend state for a dynamic hash table.
102  */
104 {
105  dsa_area *area; /* Backing dynamic shared memory area. */
106  dshash_parameters params; /* Parameters. */
107  void *arg; /* User-supplied data pointer. */
108  dshash_table_control *control; /* Control object in DSM. */
109  dsa_pointer *buckets; /* Current bucket pointers in DSM. */
110  size_t size_log2; /* log2(number of buckets) */
111 };
112 
113 /* Given a pointer to an item, find the entry (user data) it holds. */
114 #define ENTRY_FROM_ITEM(item) \
115  ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
116 
117 /* Given a pointer to an entry, find the item that holds it. */
118 #define ITEM_FROM_ENTRY(entry) \
119  ((dshash_table_item *)((char *)(entry) - \
120  MAXALIGN(sizeof(dshash_table_item))))
121 
122 /* How many resize operations (bucket splits) have there been? */
123 #define NUM_SPLITS(size_log2) \
124  (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
125 
126 /* How many buckets are there in a given size? */
127 #define NUM_BUCKETS(size_log2) \
128  (((size_t) 1) << (size_log2))
129 
130 /* How many buckets are there in each partition at a given size? */
131 #define BUCKETS_PER_PARTITION(size_log2) \
132  (((size_t) 1) << NUM_SPLITS(size_log2))
133 
134 /* Max entries before we need to grow. Half + quarter = 75% load factor. */
135 #define MAX_COUNT_PER_PARTITION(hash_table) \
136  (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
137  BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
138 
139 /* Choose partition based on the highest order bits of the hash. */
140 #define PARTITION_FOR_HASH(hash) \
141  (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
142 
143 /*
144  * Find the bucket index for a given hash and table size. Each time the table
145  * doubles in size, the appropriate bucket for a given hash value doubles and
146  * possibly adds one, depending on the newly revealed bit, so that all buckets
147  * are split.
148  */
149 #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
150  (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
151 
152 /* The index of the first bucket in a given partition. */
153 #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154  ((partition) << NUM_SPLITS(size_log2))
155 
156 /* Choose partition based on bucket index. */
157 #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
158  ((bucket_idx) >> NUM_SPLITS(size_log2))
159 
160 /* The head of the active bucket for a given hash value (lvalue). */
161 #define BUCKET_FOR_HASH(hash_table, hash) \
162  (hash_table->buckets[ \
163  BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
164  hash_table->size_log2)])
165 
166 static void delete_item(dshash_table *hash_table,
167  dshash_table_item *item);
168 static void resize(dshash_table *hash_table, size_t new_size_log2);
169 static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
170 static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
171  const void *key,
172  dsa_pointer item_pointer);
173 static void insert_item_into_bucket(dshash_table *hash_table,
174  dsa_pointer item_pointer,
175  dshash_table_item *item,
176  dsa_pointer *bucket);
178  const void *key,
179  dsa_pointer *bucket);
180 static bool delete_key_from_bucket(dshash_table *hash_table,
181  const void *key,
182  dsa_pointer *bucket_head);
183 static bool delete_item_from_bucket(dshash_table *hash_table,
184  dshash_table_item *item,
185  dsa_pointer *bucket_head);
186 static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
187 static inline bool equal_keys(dshash_table *hash_table,
188  const void *a, const void *b);
189 static inline void copy_key(dshash_table *hash_table, void *dest,
190  const void *src);
191 
192 #define PARTITION_LOCK(hash_table, i) \
193  (&(hash_table)->control->partitions[(i)].lock)
194 
195 #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
196  Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
197  DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
198 
199 /*
200  * Create a new hash table backed by the given dynamic shared area, with the
201  * given parameters. The returned object is allocated in backend-local memory
202  * using the current MemoryContext. 'arg' will be passed through to the
203  * compare, hash, and copy functions.
204  */
205 dshash_table *
206 dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
207 {
208  dshash_table *hash_table;
209  dsa_pointer control;
210 
211  /* Allocate the backend-local object representing the hash table. */
212  hash_table = palloc(sizeof(dshash_table));
213 
214  /* Allocate the control object in shared memory. */
215  control = dsa_allocate(area, sizeof(dshash_table_control));
216 
217  /* Set up the local and shared hash table structs. */
218  hash_table->area = area;
219  hash_table->params = *params;
220  hash_table->arg = arg;
221  hash_table->control = dsa_get_address(area, control);
222  hash_table->control->handle = control;
223  hash_table->control->magic = DSHASH_MAGIC;
224  hash_table->control->lwlock_tranche_id = params->tranche_id;
225 
226  /* Set up the array of lock partitions. */
227  {
229  int tranche_id = hash_table->control->lwlock_tranche_id;
230  int i;
231 
232  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
233  {
234  LWLockInitialize(&partitions[i].lock, tranche_id);
235  partitions[i].count = 0;
236  }
237  }
238 
239  /*
240  * Set up the initial array of buckets. Our initial size is the same as
241  * the number of partitions.
242  */
244  hash_table->control->buckets =
248  if (!DsaPointerIsValid(hash_table->control->buckets))
249  {
250  dsa_free(area, control);
251  ereport(ERROR,
252  (errcode(ERRCODE_OUT_OF_MEMORY),
253  errmsg("out of memory"),
254  errdetail("Failed on DSA request of size %zu.",
255  sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
256  }
257  hash_table->buckets = dsa_get_address(area,
258  hash_table->control->buckets);
259  hash_table->size_log2 = hash_table->control->size_log2;
260 
261  return hash_table;
262 }
263 
264 /*
265  * Attach to an existing hash table using a handle. The returned object is
266  * allocated in backend-local memory using the current MemoryContext. 'arg'
267  * will be passed through to the compare and hash functions.
268  */
269 dshash_table *
271  dshash_table_handle handle, void *arg)
272 {
273  dshash_table *hash_table;
274  dsa_pointer control;
275 
276  /* Allocate the backend-local object representing the hash table. */
277  hash_table = palloc(sizeof(dshash_table));
278 
279  /* Find the control object in shared memory. */
280  control = handle;
281 
282  /* Set up the local hash table struct. */
283  hash_table->area = area;
284  hash_table->params = *params;
285  hash_table->arg = arg;
286  hash_table->control = dsa_get_address(area, control);
287  Assert(hash_table->control->magic == DSHASH_MAGIC);
288 
289  /*
290  * These will later be set to the correct values by
291  * ensure_valid_bucket_pointers(), at which time we'll be holding a
292  * partition lock for interlocking against concurrent resizing.
293  */
294  hash_table->buckets = NULL;
295  hash_table->size_log2 = 0;
296 
297  return hash_table;
298 }
299 
300 /*
301  * Detach from a hash table. This frees backend-local resources associated
302  * with the hash table, but the hash table will continue to exist until it is
303  * either explicitly destroyed (by a backend that is still attached to it), or
304  * the area that backs it is returned to the operating system.
305  */
306 void
308 {
310 
311  /* The hash table may have been destroyed. Just free local memory. */
312  pfree(hash_table);
313 }
314 
315 /*
316  * Destroy a hash table, returning all memory to the area. The caller must be
317  * certain that no other backend will attempt to access the hash table before
318  * calling this function. Other backend must explicitly call dshash_detach to
319  * free up backend-local memory associated with the hash table. The backend
320  * that calls dshash_destroy must not call dshash_detach.
321  */
322 void
324 {
325  size_t size;
326  size_t i;
327 
328  Assert(hash_table->control->magic == DSHASH_MAGIC);
329  ensure_valid_bucket_pointers(hash_table);
330 
331  /* Free all the entries. */
332  size = NUM_BUCKETS(hash_table->size_log2);
333  for (i = 0; i < size; ++i)
334  {
335  dsa_pointer item_pointer = hash_table->buckets[i];
336 
337  while (DsaPointerIsValid(item_pointer))
338  {
339  dshash_table_item *item;
340  dsa_pointer next_item_pointer;
341 
342  item = dsa_get_address(hash_table->area, item_pointer);
343  next_item_pointer = item->next;
344  dsa_free(hash_table->area, item_pointer);
345  item_pointer = next_item_pointer;
346  }
347  }
348 
349  /*
350  * Vandalize the control block to help catch programming errors where
351  * other backends access the memory formerly occupied by this hash table.
352  */
353  hash_table->control->magic = 0;
354 
355  /* Free the active table and control object. */
356  dsa_free(hash_table->area, hash_table->control->buckets);
357  dsa_free(hash_table->area, hash_table->control->handle);
358 
359  pfree(hash_table);
360 }
361 
362 /*
363  * Get a handle that can be used by other processes to attach to this hash
364  * table.
365  */
368 {
369  Assert(hash_table->control->magic == DSHASH_MAGIC);
370 
371  return hash_table->control->handle;
372 }
373 
374 /*
375  * Look up an entry, given a key. Returns a pointer to an entry if one can be
376  * found with the given key. Returns NULL if the key is not found. If a
377  * non-NULL value is returned, the entry is locked and must be released by
378  * calling dshash_release_lock. If an error is raised before
379  * dshash_release_lock is called, the lock will be released automatically, but
380  * the caller must take care to ensure that the entry is not left corrupted.
381  * The lock mode is either shared or exclusive depending on 'exclusive'.
382  *
383  * The caller must not hold a lock already.
384  *
385  * Note that the lock held is in fact an LWLock, so interrupts will be held on
386  * return from this function, and not resumed until dshash_release_lock is
387  * called. It is a very good idea for the caller to release the lock quickly.
388  */
389 void *
390 dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
391 {
393  size_t partition;
394  dshash_table_item *item;
395 
396  hash = hash_key(hash_table, key);
397  partition = PARTITION_FOR_HASH(hash);
398 
399  Assert(hash_table->control->magic == DSHASH_MAGIC);
401 
402  LWLockAcquire(PARTITION_LOCK(hash_table, partition),
403  exclusive ? LW_EXCLUSIVE : LW_SHARED);
404  ensure_valid_bucket_pointers(hash_table);
405 
406  /* Search the active bucket. */
407  item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
408 
409  if (!item)
410  {
411  /* Not found. */
412  LWLockRelease(PARTITION_LOCK(hash_table, partition));
413  return NULL;
414  }
415  else
416  {
417  /* The caller will free the lock by calling dshash_release_lock. */
418  return ENTRY_FROM_ITEM(item);
419  }
420 }
421 
422 /*
423  * Returns a pointer to an exclusively locked item which must be released with
424  * dshash_release_lock. If the key is found in the hash table, 'found' is set
425  * to true and a pointer to the existing entry is returned. If the key is not
426  * found, 'found' is set to false, and a pointer to a newly created entry is
427  * returned.
428  *
429  * Notes above dshash_find() regarding locking and error handling equally
430  * apply here.
431  */
432 void *
434  const void *key,
435  bool *found)
436 {
438  size_t partition_index;
439  dshash_partition *partition;
440  dshash_table_item *item;
441 
442  hash = hash_key(hash_table, key);
443  partition_index = PARTITION_FOR_HASH(hash);
444  partition = &hash_table->control->partitions[partition_index];
445 
446  Assert(hash_table->control->magic == DSHASH_MAGIC);
448 
449 restart:
450  LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
451  LW_EXCLUSIVE);
452  ensure_valid_bucket_pointers(hash_table);
453 
454  /* Search the active bucket. */
455  item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
456 
457  if (item)
458  *found = true;
459  else
460  {
461  *found = false;
462 
463  /* Check if we are getting too full. */
464  if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
465  {
466  /*
467  * The load factor (= keys / buckets) for all buckets protected by
468  * this partition is > 0.75. Presumably the same applies
469  * generally across the whole hash table (though we don't attempt
470  * to track that directly to avoid contention on some kind of
471  * central counter; we just assume that this partition is
472  * representative). This is a good time to resize.
473  *
474  * Give up our existing lock first, because resizing needs to
475  * reacquire all the locks in the right order to avoid deadlocks.
476  */
477  LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
478  resize(hash_table, hash_table->size_log2 + 1);
479 
480  goto restart;
481  }
482 
483  /* Finally we can try to insert the new item. */
484  item = insert_into_bucket(hash_table, key,
485  &BUCKET_FOR_HASH(hash_table, hash));
486  item->hash = hash;
487  /* Adjust per-lock-partition counter for load factor knowledge. */
488  ++partition->count;
489  }
490 
491  /* The caller must release the lock with dshash_release_lock. */
492  return ENTRY_FROM_ITEM(item);
493 }
494 
495 /*
496  * Remove an entry by key. Returns true if the key was found and the
497  * corresponding entry was removed.
498  *
499  * To delete an entry that you already have a pointer to, see
500  * dshash_delete_entry.
501  */
502 bool
503 dshash_delete_key(dshash_table *hash_table, const void *key)
504 {
506  size_t partition;
507  bool found;
508 
509  Assert(hash_table->control->magic == DSHASH_MAGIC);
511 
512  hash = hash_key(hash_table, key);
513  partition = PARTITION_FOR_HASH(hash);
514 
515  LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
516  ensure_valid_bucket_pointers(hash_table);
517 
518  if (delete_key_from_bucket(hash_table, key,
519  &BUCKET_FOR_HASH(hash_table, hash)))
520  {
521  Assert(hash_table->control->partitions[partition].count > 0);
522  found = true;
523  --hash_table->control->partitions[partition].count;
524  }
525  else
526  found = false;
527 
528  LWLockRelease(PARTITION_LOCK(hash_table, partition));
529 
530  return found;
531 }
532 
533 /*
534  * Remove an entry. The entry must already be exclusively locked, and must
535  * have been obtained by dshash_find or dshash_find_or_insert. Note that this
536  * function releases the lock just like dshash_release_lock.
537  *
538  * To delete an entry by key, see dshash_delete_key.
539  */
540 void
541 dshash_delete_entry(dshash_table *hash_table, void *entry)
542 {
543  dshash_table_item *item = ITEM_FROM_ENTRY(entry);
544  size_t partition = PARTITION_FOR_HASH(item->hash);
545 
546  Assert(hash_table->control->magic == DSHASH_MAGIC);
547  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
548  LW_EXCLUSIVE));
549 
550  delete_item(hash_table, item);
551  LWLockRelease(PARTITION_LOCK(hash_table, partition));
552 }
553 
554 /*
555  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
556  */
557 void
558 dshash_release_lock(dshash_table *hash_table, void *entry)
559 {
560  dshash_table_item *item = ITEM_FROM_ENTRY(entry);
561  size_t partition_index = PARTITION_FOR_HASH(item->hash);
562 
563  Assert(hash_table->control->magic == DSHASH_MAGIC);
564 
565  LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
566 }
567 
568 /*
569  * A compare function that forwards to memcmp.
570  */
571 int
572 dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
573 {
574  return memcmp(a, b, size);
575 }
576 
577 /*
578  * A hash function that forwards to tag_hash.
579  */
581 dshash_memhash(const void *v, size_t size, void *arg)
582 {
583  return tag_hash(v, size);
584 }
585 
586 /*
587  * A copy function that forwards to memcpy.
588  */
589 void
590 dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
591 {
592  (void) memcpy(dest, src, size);
593 }
594 
595 /*
596  * A compare function that forwards to strcmp.
597  */
598 int
599 dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
600 {
601  Assert(strlen((const char *) a) < size);
602  Assert(strlen((const char *) b) < size);
603 
604  return strcmp((const char *) a, (const char *) b);
605 }
606 
607 /*
608  * A hash function that forwards to string_hash.
609  */
611 dshash_strhash(const void *v, size_t size, void *arg)
612 {
613  Assert(strlen((const char *) v) < size);
614 
615  return string_hash((const char *) v, size);
616 }
617 
618 /*
619  * A copy function that forwards to strcpy.
620  */
621 void
622 dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
623 {
624  Assert(strlen((const char *) src) < size);
625 
626  (void) strcpy((char *) dest, (const char *) src);
627 }
628 
629 /*
630  * Sequentially scan through dshash table and return all the elements one by
631  * one, return NULL when all elements have been returned.
632  *
633  * dshash_seq_term needs to be called when a scan finished. The caller may
634  * delete returned elements midst of a scan by using dshash_delete_current()
635  * if exclusive = true.
636  */
637 void
639  bool exclusive)
640 {
641  status->hash_table = hash_table;
642  status->curbucket = 0;
643  status->nbuckets = 0;
644  status->curitem = NULL;
645  status->pnextitem = InvalidDsaPointer;
646  status->curpartition = -1;
647  status->exclusive = exclusive;
648 }
649 
650 /*
651  * Returns the next element.
652  *
653  * Returned elements are locked and the caller may not release the lock. It is
654  * released by future calls to dshash_seq_next() or dshash_seq_term().
655  */
656 void *
658 {
659  dsa_pointer next_item_pointer;
660 
661  /*
662  * Not yet holding any partition locks. Need to determine the size of the
663  * hash table, it could have been resized since we were looking last.
664  * Since we iterate in partition order, we can start by unconditionally
665  * lock partition 0.
666  *
667  * Once we hold the lock, no resizing can happen until the scan ends. So
668  * we don't need to repeatedly call ensure_valid_bucket_pointers().
669  */
670  if (status->curpartition == -1)
671  {
672  Assert(status->curbucket == 0);
674 
675  status->curpartition = 0;
676 
678  status->curpartition),
679  status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
680 
682 
683  status->nbuckets =
685  next_item_pointer = status->hash_table->buckets[status->curbucket];
686  }
687  else
688  next_item_pointer = status->pnextitem;
689 
691  status->curpartition),
692  status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
693 
694  /* Move to the next bucket if we finished the current bucket */
695  while (!DsaPointerIsValid(next_item_pointer))
696  {
697  int next_partition;
698 
699  if (++status->curbucket >= status->nbuckets)
700  {
701  /* all buckets have been scanned. finish. */
702  return NULL;
703  }
704 
705  /* Check if move to the next partition */
706  next_partition =
708  status->hash_table->size_log2);
709 
710  if (status->curpartition != next_partition)
711  {
712  /*
713  * Move to the next partition. Lock the next partition then
714  * release the current, not in the reverse order to avoid
715  * concurrent resizing. Avoid dead lock by taking lock in the
716  * same order with resize().
717  */
719  next_partition),
720  status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
722  status->curpartition));
723  status->curpartition = next_partition;
724  }
725 
726  next_item_pointer = status->hash_table->buckets[status->curbucket];
727  }
728 
729  status->curitem =
730  dsa_get_address(status->hash_table->area, next_item_pointer);
731 
732  /*
733  * The caller may delete the item. Store the next item in case of
734  * deletion.
735  */
736  status->pnextitem = status->curitem->next;
737 
738  return ENTRY_FROM_ITEM(status->curitem);
739 }
740 
741 /*
742  * Terminates the seqscan and release all locks.
743  *
744  * Needs to be called after finishing or when exiting a seqscan.
745  */
746 void
748 {
749  if (status->curpartition >= 0)
751 }
752 
753 /*
754  * Remove the current entry of the seq scan.
755  */
756 void
758 {
759  dshash_table *hash_table = status->hash_table;
760  dshash_table_item *item = status->curitem;
761  size_t partition PG_USED_FOR_ASSERTS_ONLY;
762 
763  partition = PARTITION_FOR_HASH(item->hash);
764 
765  Assert(status->exclusive);
766  Assert(hash_table->control->magic == DSHASH_MAGIC);
767  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
768  LW_EXCLUSIVE));
769 
770  delete_item(hash_table, item);
771 }
772 
773 /*
774  * Print debugging information about the internal state of the hash table to
775  * stderr. The caller must hold no partition locks.
776  */
777 void
779 {
780  size_t i;
781  size_t j;
782 
783  Assert(hash_table->control->magic == DSHASH_MAGIC);
785 
786  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
787  {
788  Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
789  LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
790  }
791 
792  ensure_valid_bucket_pointers(hash_table);
793 
794  fprintf(stderr,
795  "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
796  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
797  {
798  dshash_partition *partition = &hash_table->control->partitions[i];
799  size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
800  size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
801 
802  fprintf(stderr, " partition %zu\n", i);
803  fprintf(stderr,
804  " active buckets (key count = %zu)\n", partition->count);
805 
806  for (j = begin; j < end; ++j)
807  {
808  size_t count = 0;
809  dsa_pointer bucket = hash_table->buckets[j];
810 
811  while (DsaPointerIsValid(bucket))
812  {
813  dshash_table_item *item;
814 
815  item = dsa_get_address(hash_table->area, bucket);
816 
817  bucket = item->next;
818  ++count;
819  }
820  fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
821  }
822  }
823 
824  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
825  LWLockRelease(PARTITION_LOCK(hash_table, i));
826 }
827 
828 /*
829  * Delete a locked item to which we have a pointer.
830  */
831 static void
833 {
834  size_t hash = item->hash;
835  size_t partition = PARTITION_FOR_HASH(hash);
836 
837  Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
838 
839  if (delete_item_from_bucket(hash_table, item,
840  &BUCKET_FOR_HASH(hash_table, hash)))
841  {
842  Assert(hash_table->control->partitions[partition].count > 0);
843  --hash_table->control->partitions[partition].count;
844  }
845  else
846  {
847  Assert(false);
848  }
849 }
850 
851 /*
852  * Grow the hash table if necessary to the requested number of buckets. The
853  * requested size must be double some previously observed size.
854  *
855  * Must be called without any partition lock held.
856  */
857 static void
858 resize(dshash_table *hash_table, size_t new_size_log2)
859 {
860  dsa_pointer old_buckets;
861  dsa_pointer new_buckets_shared;
862  dsa_pointer *new_buckets;
863  size_t size;
864  size_t new_size = ((size_t) 1) << new_size_log2;
865  size_t i;
866 
867  /*
868  * Acquire the locks for all lock partitions. This is expensive, but we
869  * shouldn't have to do it many times.
870  */
871  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
872  {
873  Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
874 
876  if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
877  {
878  /*
879  * Another backend has already increased the size; we can avoid
880  * obtaining all the locks and return early.
881  */
882  LWLockRelease(PARTITION_LOCK(hash_table, 0));
883  return;
884  }
885  }
886 
887  Assert(new_size_log2 == hash_table->control->size_log2 + 1);
888 
889  /* Allocate the space for the new table. */
890  new_buckets_shared = dsa_allocate0(hash_table->area,
891  sizeof(dsa_pointer) * new_size);
892  new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
893 
894  /*
895  * We've allocated the new bucket array; all that remains to do now is to
896  * reinsert all items, which amounts to adjusting all the pointers.
897  */
898  size = ((size_t) 1) << hash_table->control->size_log2;
899  for (i = 0; i < size; ++i)
900  {
901  dsa_pointer item_pointer = hash_table->buckets[i];
902 
903  while (DsaPointerIsValid(item_pointer))
904  {
905  dshash_table_item *item;
906  dsa_pointer next_item_pointer;
907 
908  item = dsa_get_address(hash_table->area, item_pointer);
909  next_item_pointer = item->next;
910  insert_item_into_bucket(hash_table, item_pointer, item,
911  &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
912  new_size_log2)]);
913  item_pointer = next_item_pointer;
914  }
915  }
916 
917  /* Swap the hash table into place and free the old one. */
918  old_buckets = hash_table->control->buckets;
919  hash_table->control->buckets = new_buckets_shared;
920  hash_table->control->size_log2 = new_size_log2;
921  hash_table->buckets = new_buckets;
922  dsa_free(hash_table->area, old_buckets);
923 
924  /* Release all the locks. */
925  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
926  LWLockRelease(PARTITION_LOCK(hash_table, i));
927 }
928 
929 /*
930  * Make sure that our backend-local bucket pointers are up to date. The
931  * caller must have locked one lock partition, which prevents resize() from
932  * running concurrently.
933  */
934 static inline void
936 {
937  if (hash_table->size_log2 != hash_table->control->size_log2)
938  {
939  hash_table->buckets = dsa_get_address(hash_table->area,
940  hash_table->control->buckets);
941  hash_table->size_log2 = hash_table->control->size_log2;
942  }
943 }
944 
945 /*
946  * Scan a locked bucket for a match, using the provided compare function.
947  */
948 static inline dshash_table_item *
949 find_in_bucket(dshash_table *hash_table, const void *key,
950  dsa_pointer item_pointer)
951 {
952  while (DsaPointerIsValid(item_pointer))
953  {
954  dshash_table_item *item;
955 
956  item = dsa_get_address(hash_table->area, item_pointer);
957  if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
958  return item;
959  item_pointer = item->next;
960  }
961  return NULL;
962 }
963 
964 /*
965  * Insert an already-allocated item into a bucket.
966  */
967 static void
969  dsa_pointer item_pointer,
970  dshash_table_item *item,
971  dsa_pointer *bucket)
972 {
973  Assert(item == dsa_get_address(hash_table->area, item_pointer));
974 
975  item->next = *bucket;
976  *bucket = item_pointer;
977 }
978 
979 /*
980  * Allocate space for an entry with the given key and insert it into the
981  * provided bucket.
982  */
983 static dshash_table_item *
985  const void *key,
986  dsa_pointer *bucket)
987 {
988  dsa_pointer item_pointer;
989  dshash_table_item *item;
990 
991  item_pointer = dsa_allocate(hash_table->area,
992  hash_table->params.entry_size +
993  MAXALIGN(sizeof(dshash_table_item)));
994  item = dsa_get_address(hash_table->area, item_pointer);
995  copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
996  insert_item_into_bucket(hash_table, item_pointer, item, bucket);
997  return item;
998 }
999 
1000 /*
1001  * Search a bucket for a matching key and delete it.
1002  */
1003 static bool
1005  const void *key,
1006  dsa_pointer *bucket_head)
1007 {
1008  while (DsaPointerIsValid(*bucket_head))
1009  {
1010  dshash_table_item *item;
1011 
1012  item = dsa_get_address(hash_table->area, *bucket_head);
1013 
1014  if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
1015  {
1016  dsa_pointer next;
1017 
1018  next = item->next;
1019  dsa_free(hash_table->area, *bucket_head);
1020  *bucket_head = next;
1021 
1022  return true;
1023  }
1024  bucket_head = &item->next;
1025  }
1026  return false;
1027 }
1028 
1029 /*
1030  * Delete the specified item from the bucket.
1031  */
1032 static bool
1034  dshash_table_item *item,
1035  dsa_pointer *bucket_head)
1036 {
1037  while (DsaPointerIsValid(*bucket_head))
1038  {
1039  dshash_table_item *bucket_item;
1040 
1041  bucket_item = dsa_get_address(hash_table->area, *bucket_head);
1042 
1043  if (bucket_item == item)
1044  {
1045  dsa_pointer next;
1046 
1047  next = item->next;
1048  dsa_free(hash_table->area, *bucket_head);
1049  *bucket_head = next;
1050  return true;
1051  }
1052  bucket_head = &bucket_item->next;
1053  }
1054  return false;
1055 }
1056 
1057 /*
1058  * Compute the hash value for a key.
1059  */
1060 static inline dshash_hash
1061 hash_key(dshash_table *hash_table, const void *key)
1062 {
1063  return hash_table->params.hash_function(key,
1064  hash_table->params.key_size,
1065  hash_table->arg);
1066 }
1067 
1068 /*
1069  * Check whether two keys compare equal.
1070  */
1071 static inline bool
1072 equal_keys(dshash_table *hash_table, const void *a, const void *b)
1073 {
1074  return hash_table->params.compare_function(a, b,
1075  hash_table->params.key_size,
1076  hash_table->arg) == 0;
1077 }
1078 
1079 /*
1080  * Copy a key.
1081  */
1082 static inline void
1083 copy_key(dshash_table *hash_table, void *dest, const void *src)
1084 {
1085  hash_table->params.copy_function(dest, src,
1086  hash_table->params.key_size,
1087  hash_table->arg);
1088 }
static int32 next
Definition: blutils.c:221
unsigned int uint32
Definition: c.h:506
#define MAXALIGN(LEN)
Definition: c.h:811
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define Assert(condition)
Definition: c.h:858
dsa_pointer dsa_allocate_extended(dsa_area *area, size_t size, int flags)
Definition: dsa.c:671
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:942
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:826
#define dsa_allocate0(area, size)
Definition: dsa.h:113
uint64 dsa_pointer
Definition: dsa.h:62
#define dsa_allocate(area, size)
Definition: dsa.h:109
#define InvalidDsaPointer
Definition: dsa.h:78
#define DSA_ALLOC_NO_OOM
Definition: dsa.h:74
#define DsaPointerIsValid(x)
Definition: dsa.h:106
#define DSA_ALLOC_ZERO
Definition: dsa.h:75
bool dshash_delete_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:503
void dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
Definition: dshash.c:590
void dshash_delete_entry(dshash_table *hash_table, void *entry)
Definition: dshash.c:541
void dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
Definition: dshash.c:622
void dshash_destroy(dshash_table *hash_table)
Definition: dshash.c:323
void dshash_release_lock(dshash_table *hash_table, void *entry)
Definition: dshash.c:558
#define PARTITION_LOCK(hash_table, i)
Definition: dshash.c:192
void dshash_detach(dshash_table *hash_table)
Definition: dshash.c:307
void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, bool exclusive)
Definition: dshash.c:638
#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)
Definition: dshash.c:149
static bool delete_key_from_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket_head)
Definition: dshash.c:1004
void * dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
Definition: dshash.c:390
dshash_hash dshash_strhash(const void *v, size_t size, void *arg)
Definition: dshash.c:611
static dshash_hash hash_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:1061
#define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table)
Definition: dshash.c:195
#define ITEM_FROM_ENTRY(entry)
Definition: dshash.c:118
dshash_table_handle dshash_get_hash_table_handle(dshash_table *hash_table)
Definition: dshash.c:367
void dshash_dump(dshash_table *hash_table)
Definition: dshash.c:778
static bool equal_keys(dshash_table *hash_table, const void *a, const void *b)
Definition: dshash.c:1072
void dshash_seq_term(dshash_seq_status *status)
Definition: dshash.c:747
#define DSHASH_MAGIC
Definition: dshash.c:63
static void insert_item_into_bucket(dshash_table *hash_table, dsa_pointer item_pointer, dshash_table_item *item, dsa_pointer *bucket)
Definition: dshash.c:968
static bool delete_item_from_bucket(dshash_table *hash_table, dshash_table_item *item, dsa_pointer *bucket_head)
Definition: dshash.c:1033
#define DSHASH_NUM_PARTITIONS
Definition: dshash.c:60
int dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:599
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)
Definition: dshash.c:157
static dshash_table_item * insert_into_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket)
Definition: dshash.c:984
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)
Definition: dshash.c:153
#define MAX_COUNT_PER_PARTITION(hash_table)
Definition: dshash.c:135
#define ENTRY_FROM_ITEM(item)
Definition: dshash.c:114
#define DSHASH_NUM_PARTITIONS_LOG2
Definition: dshash.c:59
static void delete_item(dshash_table *hash_table, dshash_table_item *item)
Definition: dshash.c:832
dshash_hash dshash_memhash(const void *v, size_t size, void *arg)
Definition: dshash.c:581
#define NUM_BUCKETS(size_log2)
Definition: dshash.c:127
static void ensure_valid_bucket_pointers(dshash_table *hash_table)
Definition: dshash.c:935
static dshash_table_item * find_in_bucket(dshash_table *hash_table, const void *key, dsa_pointer item_pointer)
Definition: dshash.c:949
void * dshash_find_or_insert(dshash_table *hash_table, const void *key, bool *found)
Definition: dshash.c:433
dshash_table * dshash_attach(dsa_area *area, const dshash_parameters *params, dshash_table_handle handle, void *arg)
Definition: dshash.c:270
static void resize(dshash_table *hash_table, size_t new_size_log2)
Definition: dshash.c:858
static void copy_key(dshash_table *hash_table, void *dest, const void *src)
Definition: dshash.c:1083
int dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:572
struct dshash_table_control dshash_table_control
dshash_table * dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
Definition: dshash.c:206
#define PARTITION_FOR_HASH(hash)
Definition: dshash.c:140
struct dshash_partition dshash_partition
void * dshash_seq_next(dshash_seq_status *status)
Definition: dshash.c:657
#define BUCKET_FOR_HASH(hash_table, hash)
Definition: dshash.c:161
void dshash_delete_current(dshash_seq_status *status)
Definition: dshash.c:757
dsa_pointer dshash_table_handle
Definition: dshash.h:24
uint32 dshash_hash
Definition: dshash.h:30
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1895
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1170
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1939
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1783
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:709
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
void * arg
static int partitions
Definition: pgbench.c:223
#define fprintf
Definition: port.h:242
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
static pg_noinline void Size size
Definition: slab.c:607
Definition: lwlock.h:42
Definition: dsa.c:348
dshash_hash_function hash_function
Definition: dshash.h:59
size_t key_size
Definition: dshash.h:56
dshash_compare_function compare_function
Definition: dshash.h:58
dshash_copy_function copy_function
Definition: dshash.h:60
size_t entry_size
Definition: dshash.h:57
LWLock lock
Definition: dshash.c:75
size_t count
Definition: dshash.c:76
bool exclusive
Definition: dshash.h:80
dshash_table_item * curitem
Definition: dshash.h:77
dsa_pointer pnextitem
Definition: dshash.h:78
int curpartition
Definition: dshash.h:79
dshash_table * hash_table
Definition: dshash.h:74
size_t size_log2
Definition: dshash.c:96
dshash_partition partitions[DSHASH_NUM_PARTITIONS]
Definition: dshash.c:87
dshash_table_handle handle
Definition: dshash.c:85
dsa_pointer buckets
Definition: dshash.c:97
dshash_hash hash
Definition: dshash.c:49
dsa_pointer next
Definition: dshash.c:47
dshash_parameters params
Definition: dshash.c:106
dsa_pointer * buckets
Definition: dshash.c:109
dsa_area * area
Definition: dshash.c:105
void * arg
Definition: dshash.c:107
size_t size_log2
Definition: dshash.c:110
dshash_table_control * control
Definition: dshash.c:108