PostgreSQL Source Code  git master
dshash.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dshash.c
4  * Concurrent hash tables backed by dynamic shared memory areas.
5  *
6  * This is an open hashing hash table, with a linked list at each table
7  * entry. It supports dynamic resizing, as required to prevent the linked
8  * lists from growing too long on average. Currently, only growing is
9  * supported: the hash table never becomes smaller.
10  *
11  * To deal with concurrency, it has a fixed size set of partitions, each of
12  * which is independently locked. Each bucket maps to a partition; so insert,
13  * find and iterate operations normally only acquire one lock. Therefore,
14  * good concurrency is achieved whenever such operations don't collide at the
15  * lock partition level. However, when a resize operation begins, all
16  * partition locks must be acquired simultaneously for a brief period. This
17  * is only expected to happen a small number of times until a stable size is
18  * found, since growth is geometric.
19  *
20  * Future versions may support iterators and incremental resizing; for now
21  * the implementation is minimalist.
22  *
23  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  * IDENTIFICATION
27  * src/backend/lib/dshash.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "common/hashfn.h"
35 #include "lib/dshash.h"
36 #include "storage/ipc.h"
37 #include "storage/lwlock.h"
38 #include "utils/dsa.h"
39 #include "utils/memutils.h"
40 
41 /*
42  * An item in the hash table. This wraps the user's entry object in an
43  * envelop that holds a pointer back to the bucket and a pointer to the next
44  * item in the bucket.
45  */
47 {
48  /* The next item in the same bucket. */
50  /* The hashed key, to avoid having to recompute it. */
52  /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
53 };
54 
55 /*
56  * The number of partitions for locking purposes. This is set to match
57  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
58  * the buffer pool must be good enough for any other purpose. This could
59  * become a runtime parameter in future.
60  */
61 #define DSHASH_NUM_PARTITIONS_LOG2 7
62 #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
63 
64 /* A magic value used to identify our hash tables. */
65 #define DSHASH_MAGIC 0x75ff6a20
66 
67 /*
68  * Tracking information for each lock partition. Initially, each partition
69  * corresponds to one bucket, but each time the hash table grows, the buckets
70  * covered by each partition split so the number of buckets covered doubles.
71  *
72  * We might want to add padding here so that each partition is on a different
73  * cache line, but doing so would bloat this structure considerably.
74  */
75 typedef struct dshash_partition
76 {
77  LWLock lock; /* Protects all buckets in this partition. */
78  size_t count; /* # of items in this partition's buckets */
80 
81 /*
82  * The head object for a hash table. This will be stored in dynamic shared
83  * memory.
84  */
85 typedef struct dshash_table_control
86 {
91 
92  /*
93  * The following members are written to only when ALL partitions locks are
94  * held. They can be read when any one partition lock is held.
95  */
96 
97  /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
98  size_t size_log2; /* log2(number of buckets) */
99  dsa_pointer buckets; /* current bucket array */
101 
102 /*
103  * Per-backend state for a dynamic hash table.
104  */
106 {
107  dsa_area *area; /* Backing dynamic shared memory area. */
108  dshash_parameters params; /* Parameters. */
109  void *arg; /* User-supplied data pointer. */
110  dshash_table_control *control; /* Control object in DSM. */
111  dsa_pointer *buckets; /* Current bucket pointers in DSM. */
112  size_t size_log2; /* log2(number of buckets) */
113 };
114 
115 /* Given a pointer to an item, find the entry (user data) it holds. */
116 #define ENTRY_FROM_ITEM(item) \
117  ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
118 
119 /* Given a pointer to an entry, find the item that holds it. */
120 #define ITEM_FROM_ENTRY(entry) \
121  ((dshash_table_item *)((char *)(entry) - \
122  MAXALIGN(sizeof(dshash_table_item))))
123 
124 /* How many resize operations (bucket splits) have there been? */
125 #define NUM_SPLITS(size_log2) \
126  (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
127 
128 /* How many buckets are there in a given size? */
129 #define NUM_BUCKETS(size_log2) \
130  (((size_t) 1) << (size_log2))
131 
132 /* How many buckets are there in each partition at a given size? */
133 #define BUCKETS_PER_PARTITION(size_log2) \
134  (((size_t) 1) << NUM_SPLITS(size_log2))
135 
136 /* Max entries before we need to grow. Half + quarter = 75% load factor. */
137 #define MAX_COUNT_PER_PARTITION(hash_table) \
138  (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
139  BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
140 
141 /* Choose partition based on the highest order bits of the hash. */
142 #define PARTITION_FOR_HASH(hash) \
143  (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
144 
145 /*
146  * Find the bucket index for a given hash and table size. Each time the table
147  * doubles in size, the appropriate bucket for a given hash value doubles and
148  * possibly adds one, depending on the newly revealed bit, so that all buckets
149  * are split.
150  */
151 #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
152  (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
153 
154 /* The index of the first bucket in a given partition. */
155 #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
156  ((partition) << NUM_SPLITS(size_log2))
157 
158 /* Choose partition based on bucket index. */
159 #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
160  ((bucket_idx) >> NUM_SPLITS(size_log2))
161 
162 /* The head of the active bucket for a given hash value (lvalue). */
163 #define BUCKET_FOR_HASH(hash_table, hash) \
164  (hash_table->buckets[ \
165  BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
166  hash_table->size_log2)])
167 
168 static void delete_item(dshash_table *hash_table,
169  dshash_table_item *item);
170 static void resize(dshash_table *hash_table, size_t new_size_log2);
171 static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
172 static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
173  const void *key,
174  dsa_pointer item_pointer);
175 static void insert_item_into_bucket(dshash_table *hash_table,
176  dsa_pointer item_pointer,
177  dshash_table_item *item,
178  dsa_pointer *bucket);
180  const void *key,
181  dsa_pointer *bucket);
182 static bool delete_key_from_bucket(dshash_table *hash_table,
183  const void *key,
184  dsa_pointer *bucket_head);
185 static bool delete_item_from_bucket(dshash_table *hash_table,
186  dshash_table_item *item,
187  dsa_pointer *bucket_head);
188 static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
189 static inline bool equal_keys(dshash_table *hash_table,
190  const void *a, const void *b);
191 
192 #define PARTITION_LOCK(hash_table, i) \
193  (&(hash_table)->control->partitions[(i)].lock)
194 
195 #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
196  Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
197  DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
198 
199 /*
200  * Create a new hash table backed by the given dynamic shared area, with the
201  * given parameters. The returned object is allocated in backend-local memory
202  * using the current MemoryContext. 'arg' will be passed through to the
203  * compare and hash functions.
204  */
205 dshash_table *
206 dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
207 {
208  dshash_table *hash_table;
209  dsa_pointer control;
210 
211  /* Allocate the backend-local object representing the hash table. */
212  hash_table = palloc(sizeof(dshash_table));
213 
214  /* Allocate the control object in shared memory. */
215  control = dsa_allocate(area, sizeof(dshash_table_control));
216 
217  /* Set up the local and shared hash table structs. */
218  hash_table->area = area;
219  hash_table->params = *params;
220  hash_table->arg = arg;
221  hash_table->control = dsa_get_address(area, control);
222  hash_table->control->handle = control;
223  hash_table->control->magic = DSHASH_MAGIC;
224  hash_table->control->lwlock_tranche_id = params->tranche_id;
225 
226  /* Set up the array of lock partitions. */
227  {
229  int tranche_id = hash_table->control->lwlock_tranche_id;
230  int i;
231 
232  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
233  {
234  LWLockInitialize(&partitions[i].lock, tranche_id);
235  partitions[i].count = 0;
236  }
237  }
238 
239  /*
240  * Set up the initial array of buckets. Our initial size is the same as
241  * the number of partitions.
242  */
244  hash_table->control->buckets =
248  if (!DsaPointerIsValid(hash_table->control->buckets))
249  {
250  dsa_free(area, control);
251  ereport(ERROR,
252  (errcode(ERRCODE_OUT_OF_MEMORY),
253  errmsg("out of memory"),
254  errdetail("Failed on DSA request of size %zu.",
255  sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
256  }
257  hash_table->buckets = dsa_get_address(area,
258  hash_table->control->buckets);
259  hash_table->size_log2 = hash_table->control->size_log2;
260 
261  return hash_table;
262 }
263 
264 /*
265  * Attach to an existing hash table using a handle. The returned object is
266  * allocated in backend-local memory using the current MemoryContext. 'arg'
267  * will be passed through to the compare and hash functions.
268  */
269 dshash_table *
271  dshash_table_handle handle, void *arg)
272 {
273  dshash_table *hash_table;
274  dsa_pointer control;
275 
276  /* Allocate the backend-local object representing the hash table. */
277  hash_table = palloc(sizeof(dshash_table));
278 
279  /* Find the control object in shared memory. */
280  control = handle;
281 
282  /* Set up the local hash table struct. */
283  hash_table->area = area;
284  hash_table->params = *params;
285  hash_table->arg = arg;
286  hash_table->control = dsa_get_address(area, control);
287  Assert(hash_table->control->magic == DSHASH_MAGIC);
288 
289  /*
290  * These will later be set to the correct values by
291  * ensure_valid_bucket_pointers(), at which time we'll be holding a
292  * partition lock for interlocking against concurrent resizing.
293  */
294  hash_table->buckets = NULL;
295  hash_table->size_log2 = 0;
296 
297  return hash_table;
298 }
299 
300 /*
301  * Detach from a hash table. This frees backend-local resources associated
302  * with the hash table, but the hash table will continue to exist until it is
303  * either explicitly destroyed (by a backend that is still attached to it), or
304  * the area that backs it is returned to the operating system.
305  */
306 void
308 {
310 
311  /* The hash table may have been destroyed. Just free local memory. */
312  pfree(hash_table);
313 }
314 
315 /*
316  * Destroy a hash table, returning all memory to the area. The caller must be
317  * certain that no other backend will attempt to access the hash table before
318  * calling this function. Other backend must explicitly call dshash_detach to
319  * free up backend-local memory associated with the hash table. The backend
320  * that calls dshash_destroy must not call dshash_detach.
321  */
322 void
324 {
325  size_t size;
326  size_t i;
327 
328  Assert(hash_table->control->magic == DSHASH_MAGIC);
329  ensure_valid_bucket_pointers(hash_table);
330 
331  /* Free all the entries. */
332  size = NUM_BUCKETS(hash_table->size_log2);
333  for (i = 0; i < size; ++i)
334  {
335  dsa_pointer item_pointer = hash_table->buckets[i];
336 
337  while (DsaPointerIsValid(item_pointer))
338  {
339  dshash_table_item *item;
340  dsa_pointer next_item_pointer;
341 
342  item = dsa_get_address(hash_table->area, item_pointer);
343  next_item_pointer = item->next;
344  dsa_free(hash_table->area, item_pointer);
345  item_pointer = next_item_pointer;
346  }
347  }
348 
349  /*
350  * Vandalize the control block to help catch programming errors where
351  * other backends access the memory formerly occupied by this hash table.
352  */
353  hash_table->control->magic = 0;
354 
355  /* Free the active table and control object. */
356  dsa_free(hash_table->area, hash_table->control->buckets);
357  dsa_free(hash_table->area, hash_table->control->handle);
358 
359  pfree(hash_table);
360 }
361 
362 /*
363  * Get a handle that can be used by other processes to attach to this hash
364  * table.
365  */
368 {
369  Assert(hash_table->control->magic == DSHASH_MAGIC);
370 
371  return hash_table->control->handle;
372 }
373 
374 /*
375  * Look up an entry, given a key. Returns a pointer to an entry if one can be
376  * found with the given key. Returns NULL if the key is not found. If a
377  * non-NULL value is returned, the entry is locked and must be released by
378  * calling dshash_release_lock. If an error is raised before
379  * dshash_release_lock is called, the lock will be released automatically, but
380  * the caller must take care to ensure that the entry is not left corrupted.
381  * The lock mode is either shared or exclusive depending on 'exclusive'.
382  *
383  * The caller must not hold a lock already.
384  *
385  * Note that the lock held is in fact an LWLock, so interrupts will be held on
386  * return from this function, and not resumed until dshash_release_lock is
387  * called. It is a very good idea for the caller to release the lock quickly.
388  */
389 void *
390 dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
391 {
393  size_t partition;
394  dshash_table_item *item;
395 
396  hash = hash_key(hash_table, key);
397  partition = PARTITION_FOR_HASH(hash);
398 
399  Assert(hash_table->control->magic == DSHASH_MAGIC);
401 
402  LWLockAcquire(PARTITION_LOCK(hash_table, partition),
403  exclusive ? LW_EXCLUSIVE : LW_SHARED);
404  ensure_valid_bucket_pointers(hash_table);
405 
406  /* Search the active bucket. */
407  item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
408 
409  if (!item)
410  {
411  /* Not found. */
412  LWLockRelease(PARTITION_LOCK(hash_table, partition));
413  return NULL;
414  }
415  else
416  {
417  /* The caller will free the lock by calling dshash_release_lock. */
418  return ENTRY_FROM_ITEM(item);
419  }
420 }
421 
422 /*
423  * Returns a pointer to an exclusively locked item which must be released with
424  * dshash_release_lock. If the key is found in the hash table, 'found' is set
425  * to true and a pointer to the existing entry is returned. If the key is not
426  * found, 'found' is set to false, and a pointer to a newly created entry is
427  * returned.
428  *
429  * Notes above dshash_find() regarding locking and error handling equally
430  * apply here.
431  */
432 void *
434  const void *key,
435  bool *found)
436 {
438  size_t partition_index;
439  dshash_partition *partition;
440  dshash_table_item *item;
441 
442  hash = hash_key(hash_table, key);
443  partition_index = PARTITION_FOR_HASH(hash);
444  partition = &hash_table->control->partitions[partition_index];
445 
446  Assert(hash_table->control->magic == DSHASH_MAGIC);
448 
449 restart:
450  LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
451  LW_EXCLUSIVE);
452  ensure_valid_bucket_pointers(hash_table);
453 
454  /* Search the active bucket. */
455  item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
456 
457  if (item)
458  *found = true;
459  else
460  {
461  *found = false;
462 
463  /* Check if we are getting too full. */
464  if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
465  {
466  /*
467  * The load factor (= keys / buckets) for all buckets protected by
468  * this partition is > 0.75. Presumably the same applies
469  * generally across the whole hash table (though we don't attempt
470  * to track that directly to avoid contention on some kind of
471  * central counter; we just assume that this partition is
472  * representative). This is a good time to resize.
473  *
474  * Give up our existing lock first, because resizing needs to
475  * reacquire all the locks in the right order to avoid deadlocks.
476  */
477  LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
478  resize(hash_table, hash_table->size_log2 + 1);
479 
480  goto restart;
481  }
482 
483  /* Finally we can try to insert the new item. */
484  item = insert_into_bucket(hash_table, key,
485  &BUCKET_FOR_HASH(hash_table, hash));
486  item->hash = hash;
487  /* Adjust per-lock-partition counter for load factor knowledge. */
488  ++partition->count;
489  }
490 
491  /* The caller must release the lock with dshash_release_lock. */
492  return ENTRY_FROM_ITEM(item);
493 }
494 
495 /*
496  * Remove an entry by key. Returns true if the key was found and the
497  * corresponding entry was removed.
498  *
499  * To delete an entry that you already have a pointer to, see
500  * dshash_delete_entry.
501  */
502 bool
503 dshash_delete_key(dshash_table *hash_table, const void *key)
504 {
506  size_t partition;
507  bool found;
508 
509  Assert(hash_table->control->magic == DSHASH_MAGIC);
511 
512  hash = hash_key(hash_table, key);
513  partition = PARTITION_FOR_HASH(hash);
514 
515  LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
516  ensure_valid_bucket_pointers(hash_table);
517 
518  if (delete_key_from_bucket(hash_table, key,
519  &BUCKET_FOR_HASH(hash_table, hash)))
520  {
521  Assert(hash_table->control->partitions[partition].count > 0);
522  found = true;
523  --hash_table->control->partitions[partition].count;
524  }
525  else
526  found = false;
527 
528  LWLockRelease(PARTITION_LOCK(hash_table, partition));
529 
530  return found;
531 }
532 
533 /*
534  * Remove an entry. The entry must already be exclusively locked, and must
535  * have been obtained by dshash_find or dshash_find_or_insert. Note that this
536  * function releases the lock just like dshash_release_lock.
537  *
538  * To delete an entry by key, see dshash_delete_key.
539  */
540 void
541 dshash_delete_entry(dshash_table *hash_table, void *entry)
542 {
543  dshash_table_item *item = ITEM_FROM_ENTRY(entry);
544  size_t partition = PARTITION_FOR_HASH(item->hash);
545 
546  Assert(hash_table->control->magic == DSHASH_MAGIC);
547  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
548  LW_EXCLUSIVE));
549 
550  delete_item(hash_table, item);
551  LWLockRelease(PARTITION_LOCK(hash_table, partition));
552 }
553 
554 /*
555  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
556  */
557 void
558 dshash_release_lock(dshash_table *hash_table, void *entry)
559 {
560  dshash_table_item *item = ITEM_FROM_ENTRY(entry);
561  size_t partition_index = PARTITION_FOR_HASH(item->hash);
562 
563  Assert(hash_table->control->magic == DSHASH_MAGIC);
564 
565  LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
566 }
567 
568 /*
569  * A compare function that forwards to memcmp.
570  */
571 int
572 dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
573 {
574  return memcmp(a, b, size);
575 }
576 
577 /*
578  * A hash function that forwards to tag_hash.
579  */
581 dshash_memhash(const void *v, size_t size, void *arg)
582 {
583  return tag_hash(v, size);
584 }
585 
586 /*
587  * Sequentially scan through dshash table and return all the elements one by
588  * one, return NULL when all elements have been returned.
589  *
590  * dshash_seq_term needs to be called when a scan finished. The caller may
591  * delete returned elements midst of a scan by using dshash_delete_current()
592  * if exclusive = true.
593  */
594 void
596  bool exclusive)
597 {
598  status->hash_table = hash_table;
599  status->curbucket = 0;
600  status->nbuckets = 0;
601  status->curitem = NULL;
602  status->pnextitem = InvalidDsaPointer;
603  status->curpartition = -1;
604  status->exclusive = exclusive;
605 }
606 
607 /*
608  * Returns the next element.
609  *
610  * Returned elements are locked and the caller may not release the lock. It is
611  * released by future calls to dshash_seq_next() or dshash_seq_term().
612  */
613 void *
615 {
616  dsa_pointer next_item_pointer;
617 
618  /*
619  * Not yet holding any partition locks. Need to determine the size of the
620  * hash table, it could have been resized since we were looking last.
621  * Since we iterate in partition order, we can start by unconditionally
622  * lock partition 0.
623  *
624  * Once we hold the lock, no resizing can happen until the scan ends. So
625  * we don't need to repeatedly call ensure_valid_bucket_pointers().
626  */
627  if (status->curpartition == -1)
628  {
629  Assert(status->curbucket == 0);
631 
632  status->curpartition = 0;
633 
635  status->curpartition),
636  status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
637 
639 
640  status->nbuckets =
642  next_item_pointer = status->hash_table->buckets[status->curbucket];
643  }
644  else
645  next_item_pointer = status->pnextitem;
646 
648  status->curpartition),
649  status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
650 
651  /* Move to the next bucket if we finished the current bucket */
652  while (!DsaPointerIsValid(next_item_pointer))
653  {
654  int next_partition;
655 
656  if (++status->curbucket >= status->nbuckets)
657  {
658  /* all buckets have been scanned. finish. */
659  return NULL;
660  }
661 
662  /* Check if move to the next partition */
663  next_partition =
665  status->hash_table->size_log2);
666 
667  if (status->curpartition != next_partition)
668  {
669  /*
670  * Move to the next partition. Lock the next partition then
671  * release the current, not in the reverse order to avoid
672  * concurrent resizing. Avoid dead lock by taking lock in the
673  * same order with resize().
674  */
676  next_partition),
677  status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
679  status->curpartition));
680  status->curpartition = next_partition;
681  }
682 
683  next_item_pointer = status->hash_table->buckets[status->curbucket];
684  }
685 
686  status->curitem =
687  dsa_get_address(status->hash_table->area, next_item_pointer);
688 
689  /*
690  * The caller may delete the item. Store the next item in case of
691  * deletion.
692  */
693  status->pnextitem = status->curitem->next;
694 
695  return ENTRY_FROM_ITEM(status->curitem);
696 }
697 
698 /*
699  * Terminates the seqscan and release all locks.
700  *
701  * Needs to be called after finishing or when exiting a seqscan.
702  */
703 void
705 {
706  if (status->curpartition >= 0)
708 }
709 
710 /*
711  * Remove the current entry of the seq scan.
712  */
713 void
715 {
716  dshash_table *hash_table = status->hash_table;
717  dshash_table_item *item = status->curitem;
718  size_t partition PG_USED_FOR_ASSERTS_ONLY;
719 
720  partition = PARTITION_FOR_HASH(item->hash);
721 
722  Assert(status->exclusive);
723  Assert(hash_table->control->magic == DSHASH_MAGIC);
724  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
725  LW_EXCLUSIVE));
726 
727  delete_item(hash_table, item);
728 }
729 
730 /*
731  * Print debugging information about the internal state of the hash table to
732  * stderr. The caller must hold no partition locks.
733  */
734 void
736 {
737  size_t i;
738  size_t j;
739 
740  Assert(hash_table->control->magic == DSHASH_MAGIC);
742 
743  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
744  {
745  Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
746  LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
747  }
748 
749  ensure_valid_bucket_pointers(hash_table);
750 
751  fprintf(stderr,
752  "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
753  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
754  {
755  dshash_partition *partition = &hash_table->control->partitions[i];
756  size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
757  size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
758 
759  fprintf(stderr, " partition %zu\n", i);
760  fprintf(stderr,
761  " active buckets (key count = %zu)\n", partition->count);
762 
763  for (j = begin; j < end; ++j)
764  {
765  size_t count = 0;
766  dsa_pointer bucket = hash_table->buckets[j];
767 
768  while (DsaPointerIsValid(bucket))
769  {
770  dshash_table_item *item;
771 
772  item = dsa_get_address(hash_table->area, bucket);
773 
774  bucket = item->next;
775  ++count;
776  }
777  fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
778  }
779  }
780 
781  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
782  LWLockRelease(PARTITION_LOCK(hash_table, i));
783 }
784 
785 /*
786  * Delete a locked item to which we have a pointer.
787  */
788 static void
790 {
791  size_t hash = item->hash;
792  size_t partition = PARTITION_FOR_HASH(hash);
793 
794  Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
795 
796  if (delete_item_from_bucket(hash_table, item,
797  &BUCKET_FOR_HASH(hash_table, hash)))
798  {
799  Assert(hash_table->control->partitions[partition].count > 0);
800  --hash_table->control->partitions[partition].count;
801  }
802  else
803  {
804  Assert(false);
805  }
806 }
807 
808 /*
809  * Grow the hash table if necessary to the requested number of buckets. The
810  * requested size must be double some previously observed size.
811  *
812  * Must be called without any partition lock held.
813  */
814 static void
815 resize(dshash_table *hash_table, size_t new_size_log2)
816 {
817  dsa_pointer old_buckets;
818  dsa_pointer new_buckets_shared;
819  dsa_pointer *new_buckets;
820  size_t size;
821  size_t new_size = ((size_t) 1) << new_size_log2;
822  size_t i;
823 
824  /*
825  * Acquire the locks for all lock partitions. This is expensive, but we
826  * shouldn't have to do it many times.
827  */
828  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
829  {
830  Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
831 
833  if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
834  {
835  /*
836  * Another backend has already increased the size; we can avoid
837  * obtaining all the locks and return early.
838  */
839  LWLockRelease(PARTITION_LOCK(hash_table, 0));
840  return;
841  }
842  }
843 
844  Assert(new_size_log2 == hash_table->control->size_log2 + 1);
845 
846  /* Allocate the space for the new table. */
847  new_buckets_shared = dsa_allocate0(hash_table->area,
848  sizeof(dsa_pointer) * new_size);
849  new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
850 
851  /*
852  * We've allocated the new bucket array; all that remains to do now is to
853  * reinsert all items, which amounts to adjusting all the pointers.
854  */
855  size = ((size_t) 1) << hash_table->control->size_log2;
856  for (i = 0; i < size; ++i)
857  {
858  dsa_pointer item_pointer = hash_table->buckets[i];
859 
860  while (DsaPointerIsValid(item_pointer))
861  {
862  dshash_table_item *item;
863  dsa_pointer next_item_pointer;
864 
865  item = dsa_get_address(hash_table->area, item_pointer);
866  next_item_pointer = item->next;
867  insert_item_into_bucket(hash_table, item_pointer, item,
868  &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
869  new_size_log2)]);
870  item_pointer = next_item_pointer;
871  }
872  }
873 
874  /* Swap the hash table into place and free the old one. */
875  old_buckets = hash_table->control->buckets;
876  hash_table->control->buckets = new_buckets_shared;
877  hash_table->control->size_log2 = new_size_log2;
878  hash_table->buckets = new_buckets;
879  dsa_free(hash_table->area, old_buckets);
880 
881  /* Release all the locks. */
882  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
883  LWLockRelease(PARTITION_LOCK(hash_table, i));
884 }
885 
886 /*
887  * Make sure that our backend-local bucket pointers are up to date. The
888  * caller must have locked one lock partition, which prevents resize() from
889  * running concurrently.
890  */
891 static inline void
893 {
894  if (hash_table->size_log2 != hash_table->control->size_log2)
895  {
896  hash_table->buckets = dsa_get_address(hash_table->area,
897  hash_table->control->buckets);
898  hash_table->size_log2 = hash_table->control->size_log2;
899  }
900 }
901 
902 /*
903  * Scan a locked bucket for a match, using the provided compare function.
904  */
905 static inline dshash_table_item *
906 find_in_bucket(dshash_table *hash_table, const void *key,
907  dsa_pointer item_pointer)
908 {
909  while (DsaPointerIsValid(item_pointer))
910  {
911  dshash_table_item *item;
912 
913  item = dsa_get_address(hash_table->area, item_pointer);
914  if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
915  return item;
916  item_pointer = item->next;
917  }
918  return NULL;
919 }
920 
921 /*
922  * Insert an already-allocated item into a bucket.
923  */
924 static void
926  dsa_pointer item_pointer,
927  dshash_table_item *item,
928  dsa_pointer *bucket)
929 {
930  Assert(item == dsa_get_address(hash_table->area, item_pointer));
931 
932  item->next = *bucket;
933  *bucket = item_pointer;
934 }
935 
936 /*
937  * Allocate space for an entry with the given key and insert it into the
938  * provided bucket.
939  */
940 static dshash_table_item *
942  const void *key,
943  dsa_pointer *bucket)
944 {
945  dsa_pointer item_pointer;
946  dshash_table_item *item;
947 
948  item_pointer = dsa_allocate(hash_table->area,
949  hash_table->params.entry_size +
950  MAXALIGN(sizeof(dshash_table_item)));
951  item = dsa_get_address(hash_table->area, item_pointer);
952  memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size);
953  insert_item_into_bucket(hash_table, item_pointer, item, bucket);
954  return item;
955 }
956 
957 /*
958  * Search a bucket for a matching key and delete it.
959  */
960 static bool
962  const void *key,
963  dsa_pointer *bucket_head)
964 {
965  while (DsaPointerIsValid(*bucket_head))
966  {
967  dshash_table_item *item;
968 
969  item = dsa_get_address(hash_table->area, *bucket_head);
970 
971  if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
972  {
974 
975  next = item->next;
976  dsa_free(hash_table->area, *bucket_head);
977  *bucket_head = next;
978 
979  return true;
980  }
981  bucket_head = &item->next;
982  }
983  return false;
984 }
985 
986 /*
987  * Delete the specified item from the bucket.
988  */
989 static bool
991  dshash_table_item *item,
992  dsa_pointer *bucket_head)
993 {
994  while (DsaPointerIsValid(*bucket_head))
995  {
996  dshash_table_item *bucket_item;
997 
998  bucket_item = dsa_get_address(hash_table->area, *bucket_head);
999 
1000  if (bucket_item == item)
1001  {
1002  dsa_pointer next;
1003 
1004  next = item->next;
1005  dsa_free(hash_table->area, *bucket_head);
1006  *bucket_head = next;
1007  return true;
1008  }
1009  bucket_head = &bucket_item->next;
1010  }
1011  return false;
1012 }
1013 
1014 /*
1015  * Compute the hash value for a key.
1016  */
1017 static inline dshash_hash
1018 hash_key(dshash_table *hash_table, const void *key)
1019 {
1020  return hash_table->params.hash_function(key,
1021  hash_table->params.key_size,
1022  hash_table->arg);
1023 }
1024 
1025 /*
1026  * Check whether two keys compare equal.
1027  */
1028 static inline bool
1029 equal_keys(dshash_table *hash_table, const void *a, const void *b)
1030 {
1031  return hash_table->params.compare_function(a, b,
1032  hash_table->params.key_size,
1033  hash_table->arg) == 0;
1034 }
static int32 next
Definition: blutils.c:219
unsigned int uint32
Definition: c.h:495
#define MAXALIGN(LEN)
Definition: c.h:800
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
dsa_pointer dsa_allocate_extended(dsa_area *area, size_t size, int flags)
Definition: dsa.c:678
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:949
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:833
#define dsa_allocate0(area, size)
Definition: dsa.h:88
uint64 dsa_pointer
Definition: dsa.h:62
#define dsa_allocate(area, size)
Definition: dsa.h:84
#define InvalidDsaPointer
Definition: dsa.h:78
#define DSA_ALLOC_NO_OOM
Definition: dsa.h:74
#define DsaPointerIsValid(x)
Definition: dsa.h:81
#define DSA_ALLOC_ZERO
Definition: dsa.h:75
bool dshash_delete_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:503
void dshash_delete_entry(dshash_table *hash_table, void *entry)
Definition: dshash.c:541
void dshash_destroy(dshash_table *hash_table)
Definition: dshash.c:323
void dshash_release_lock(dshash_table *hash_table, void *entry)
Definition: dshash.c:558
#define PARTITION_LOCK(hash_table, i)
Definition: dshash.c:192
void dshash_detach(dshash_table *hash_table)
Definition: dshash.c:307
void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, bool exclusive)
Definition: dshash.c:595
#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)
Definition: dshash.c:151
static bool delete_key_from_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket_head)
Definition: dshash.c:961
void * dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
Definition: dshash.c:390
static dshash_hash hash_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:1018
#define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table)
Definition: dshash.c:195
#define ITEM_FROM_ENTRY(entry)
Definition: dshash.c:120
dshash_table_handle dshash_get_hash_table_handle(dshash_table *hash_table)
Definition: dshash.c:367
void dshash_dump(dshash_table *hash_table)
Definition: dshash.c:735
static bool equal_keys(dshash_table *hash_table, const void *a, const void *b)
Definition: dshash.c:1029
void dshash_seq_term(dshash_seq_status *status)
Definition: dshash.c:704
#define DSHASH_MAGIC
Definition: dshash.c:65
static void insert_item_into_bucket(dshash_table *hash_table, dsa_pointer item_pointer, dshash_table_item *item, dsa_pointer *bucket)
Definition: dshash.c:925
static bool delete_item_from_bucket(dshash_table *hash_table, dshash_table_item *item, dsa_pointer *bucket_head)
Definition: dshash.c:990
#define DSHASH_NUM_PARTITIONS
Definition: dshash.c:62
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)
Definition: dshash.c:159
static dshash_table_item * insert_into_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket)
Definition: dshash.c:941
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)
Definition: dshash.c:155
#define MAX_COUNT_PER_PARTITION(hash_table)
Definition: dshash.c:137
#define ENTRY_FROM_ITEM(item)
Definition: dshash.c:116
#define DSHASH_NUM_PARTITIONS_LOG2
Definition: dshash.c:61
static void delete_item(dshash_table *hash_table, dshash_table_item *item)
Definition: dshash.c:789
dshash_hash dshash_memhash(const void *v, size_t size, void *arg)
Definition: dshash.c:581
#define NUM_BUCKETS(size_log2)
Definition: dshash.c:129
static void ensure_valid_bucket_pointers(dshash_table *hash_table)
Definition: dshash.c:892
static dshash_table_item * find_in_bucket(dshash_table *hash_table, const void *key, dsa_pointer item_pointer)
Definition: dshash.c:906
void * dshash_find_or_insert(dshash_table *hash_table, const void *key, bool *found)
Definition: dshash.c:433
dshash_table * dshash_attach(dsa_area *area, const dshash_parameters *params, dshash_table_handle handle, void *arg)
Definition: dshash.c:270
static void resize(dshash_table *hash_table, size_t new_size_log2)
Definition: dshash.c:815
int dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:572
struct dshash_table_control dshash_table_control
dshash_table * dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
Definition: dshash.c:206
#define PARTITION_FOR_HASH(hash)
Definition: dshash.c:142
struct dshash_partition dshash_partition
void * dshash_seq_next(dshash_seq_status *status)
Definition: dshash.c:614
#define BUCKET_FOR_HASH(hash_table, hash)
Definition: dshash.c:163
void dshash_delete_current(dshash_seq_status *status)
Definition: dshash.c:714
dsa_pointer dshash_table_handle
Definition: dshash.h:24
uint32 dshash_hash
Definition: dshash.h:30
int errdetail(const char *fmt,...)
Definition: elog.c:1202
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1920
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1195
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1964
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1808
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:730
@ LW_SHARED
Definition: lwlock.h:117
@ LW_EXCLUSIVE
Definition: lwlock.h:116
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc(Size size)
Definition: mcxt.c:1226
void * arg
static int partitions
Definition: pgbench.c:223
#define fprintf
Definition: port.h:242
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
Definition: lwlock.h:41
Definition: dsa.c:367
dshash_hash_function hash_function
Definition: dshash.h:57
size_t key_size
Definition: dshash.h:54
dshash_compare_function compare_function
Definition: dshash.h:56
size_t entry_size
Definition: dshash.h:55
LWLock lock
Definition: dshash.c:77
size_t count
Definition: dshash.c:78
bool exclusive
Definition: dshash.h:77
dshash_table_item * curitem
Definition: dshash.h:74
dsa_pointer pnextitem
Definition: dshash.h:75
int curpartition
Definition: dshash.h:76
dshash_table * hash_table
Definition: dshash.h:71
size_t size_log2
Definition: dshash.c:98
dshash_partition partitions[DSHASH_NUM_PARTITIONS]
Definition: dshash.c:89
dshash_table_handle handle
Definition: dshash.c:87
dsa_pointer buckets
Definition: dshash.c:99
dshash_hash hash
Definition: dshash.c:51
dsa_pointer next
Definition: dshash.c:49
dshash_parameters params
Definition: dshash.c:108
dsa_pointer * buckets
Definition: dshash.c:111
dsa_area * area
Definition: dshash.c:107
void * arg
Definition: dshash.c:109
size_t size_log2
Definition: dshash.c:112
dshash_table_control * control
Definition: dshash.c:110