PostgreSQL Source Code  git master
dshash.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dshash.c
4  * Concurrent hash tables backed by dynamic shared memory areas.
5  *
6  * This is an open hashing hash table, with a linked list at each table
7  * entry. It supports dynamic resizing, as required to prevent the linked
8  * lists from growing too long on average. Currently, only growing is
9  * supported: the hash table never becomes smaller.
10  *
11  * To deal with concurrency, it has a fixed size set of partitions, each of
12  * which is independently locked. Each bucket maps to a partition; so insert,
13  * find and iterate operations normally only acquire one lock. Therefore,
14  * good concurrency is achieved whenever such operations don't collide at the
15  * lock partition level. However, when a resize operation begins, all
16  * partition locks must be acquired simultaneously for a brief period. This
17  * is only expected to happen a small number of times until a stable size is
18  * found, since growth is geometric.
19  *
20  * Future versions may support iterators and incremental resizing; for now
21  * the implementation is minimalist.
22  *
23  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  * IDENTIFICATION
27  * src/backend/lib/dshash.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "common/hashfn.h"
35 #include "lib/dshash.h"
36 #include "storage/ipc.h"
37 #include "storage/lwlock.h"
38 #include "utils/dsa.h"
39 #include "utils/memutils.h"
40 
41 /*
42  * An item in the hash table. This wraps the user's entry object in an
43  * envelop that holds a pointer back to the bucket and a pointer to the next
44  * item in the bucket.
45  */
47 {
48  /* The next item in the same bucket. */
50  /* The hashed key, to avoid having to recompute it. */
52  /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
53 };
54 
55 /*
56  * The number of partitions for locking purposes. This is set to match
57  * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
58  * the buffer pool must be good enough for any other purpose. This could
59  * become a runtime parameter in future.
60  */
61 #define DSHASH_NUM_PARTITIONS_LOG2 7
62 #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
63 
64 /* A magic value used to identify our hash tables. */
65 #define DSHASH_MAGIC 0x75ff6a20
66 
67 /*
68  * Tracking information for each lock partition. Initially, each partition
69  * corresponds to one bucket, but each time the hash table grows, the buckets
70  * covered by each partition split so the number of buckets covered doubles.
71  *
72  * We might want to add padding here so that each partition is on a different
73  * cache line, but doing so would bloat this structure considerably.
74  */
75 typedef struct dshash_partition
76 {
77  LWLock lock; /* Protects all buckets in this partition. */
78  size_t count; /* # of items in this partition's buckets */
80 
81 /*
82  * The head object for a hash table. This will be stored in dynamic shared
83  * memory.
84  */
85 typedef struct dshash_table_control
86 {
91 
92  /*
93  * The following members are written to only when ALL partitions locks are
94  * held. They can be read when any one partition lock is held.
95  */
96 
97  /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
98  size_t size_log2; /* log2(number of buckets) */
99  dsa_pointer buckets; /* current bucket array */
101 
102 /*
103  * Per-backend state for a dynamic hash table.
104  */
106 {
107  dsa_area *area; /* Backing dynamic shared memory area. */
108  dshash_parameters params; /* Parameters. */
109  void *arg; /* User-supplied data pointer. */
110  dshash_table_control *control; /* Control object in DSM. */
111  dsa_pointer *buckets; /* Current bucket pointers in DSM. */
112  size_t size_log2; /* log2(number of buckets) */
113  bool find_locked; /* Is any partition lock held by 'find'? */
114  bool find_exclusively_locked; /* ... exclusively? */
115 };
116 
117 /* Given a pointer to an item, find the entry (user data) it holds. */
118 #define ENTRY_FROM_ITEM(item) \
119  ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
120 
121 /* Given a pointer to an entry, find the item that holds it. */
122 #define ITEM_FROM_ENTRY(entry) \
123  ((dshash_table_item *)((char *)(entry) - \
124  MAXALIGN(sizeof(dshash_table_item))))
125 
126 /* How many resize operations (bucket splits) have there been? */
127 #define NUM_SPLITS(size_log2) \
128  (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
129 
130 /* How many buckets are there in a given size? */
131 #define NUM_BUCKETS(size_log2) \
132  (((size_t) 1) << (size_log2))
133 
134 /* How many buckets are there in each partition at a given size? */
135 #define BUCKETS_PER_PARTITION(size_log2) \
136  (((size_t) 1) << NUM_SPLITS(size_log2))
137 
138 /* Max entries before we need to grow. Half + quarter = 75% load factor. */
139 #define MAX_COUNT_PER_PARTITION(hash_table) \
140  (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
141  BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
142 
143 /* Choose partition based on the highest order bits of the hash. */
144 #define PARTITION_FOR_HASH(hash) \
145  (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
146 
147 /*
148  * Find the bucket index for a given hash and table size. Each time the table
149  * doubles in size, the appropriate bucket for a given hash value doubles and
150  * possibly adds one, depending on the newly revealed bit, so that all buckets
151  * are split.
152  */
153 #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
154  (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
155 
156 /* The index of the first bucket in a given partition. */
157 #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
158  ((partition) << NUM_SPLITS(size_log2))
159 
160 /* Choose partition based on bucket index. */
161 #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
162  ((bucket_idx) >> NUM_SPLITS(size_log2))
163 
164 /* The head of the active bucket for a given hash value (lvalue). */
165 #define BUCKET_FOR_HASH(hash_table, hash) \
166  (hash_table->buckets[ \
167  BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
168  hash_table->size_log2)])
169 
170 static void delete_item(dshash_table *hash_table,
171  dshash_table_item *item);
172 static void resize(dshash_table *hash_table, size_t new_size);
173 static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
174 static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
175  const void *key,
176  dsa_pointer item_pointer);
177 static void insert_item_into_bucket(dshash_table *hash_table,
178  dsa_pointer item_pointer,
179  dshash_table_item *item,
180  dsa_pointer *bucket);
182  const void *key,
183  dsa_pointer *bucket);
184 static bool delete_key_from_bucket(dshash_table *hash_table,
185  const void *key,
186  dsa_pointer *bucket_head);
187 static bool delete_item_from_bucket(dshash_table *hash_table,
188  dshash_table_item *item,
189  dsa_pointer *bucket_head);
190 static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
191 static inline bool equal_keys(dshash_table *hash_table,
192  const void *a, const void *b);
193 
194 #define PARTITION_LOCK(hash_table, i) \
195  (&(hash_table)->control->partitions[(i)].lock)
196 
197 /*
198  * Create a new hash table backed by the given dynamic shared area, with the
199  * given parameters. The returned object is allocated in backend-local memory
200  * using the current MemoryContext. 'arg' will be passed through to the
201  * compare and hash functions.
202  */
203 dshash_table *
204 dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
205 {
206  dshash_table *hash_table;
207  dsa_pointer control;
208 
209  /* Allocate the backend-local object representing the hash table. */
210  hash_table = palloc(sizeof(dshash_table));
211 
212  /* Allocate the control object in shared memory. */
213  control = dsa_allocate(area, sizeof(dshash_table_control));
214 
215  /* Set up the local and shared hash table structs. */
216  hash_table->area = area;
217  hash_table->params = *params;
218  hash_table->arg = arg;
219  hash_table->control = dsa_get_address(area, control);
220  hash_table->control->handle = control;
221  hash_table->control->magic = DSHASH_MAGIC;
222  hash_table->control->lwlock_tranche_id = params->tranche_id;
223 
224  /* Set up the array of lock partitions. */
225  {
227  int tranche_id = hash_table->control->lwlock_tranche_id;
228  int i;
229 
230  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
231  {
232  LWLockInitialize(&partitions[i].lock, tranche_id);
233  partitions[i].count = 0;
234  }
235  }
236 
237  hash_table->find_locked = false;
238  hash_table->find_exclusively_locked = false;
239 
240  /*
241  * Set up the initial array of buckets. Our initial size is the same as
242  * the number of partitions.
243  */
245  hash_table->control->buckets =
249  if (!DsaPointerIsValid(hash_table->control->buckets))
250  {
251  dsa_free(area, control);
252  ereport(ERROR,
253  (errcode(ERRCODE_OUT_OF_MEMORY),
254  errmsg("out of memory"),
255  errdetail("Failed on DSA request of size %zu.",
256  sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
257  }
258  hash_table->buckets = dsa_get_address(area,
259  hash_table->control->buckets);
260  hash_table->size_log2 = hash_table->control->size_log2;
261 
262  return hash_table;
263 }
264 
265 /*
266  * Attach to an existing hash table using a handle. The returned object is
267  * allocated in backend-local memory using the current MemoryContext. 'arg'
268  * will be passed through to the compare and hash functions.
269  */
270 dshash_table *
272  dshash_table_handle handle, void *arg)
273 {
274  dshash_table *hash_table;
275  dsa_pointer control;
276 
277  /* Allocate the backend-local object representing the hash table. */
278  hash_table = palloc(sizeof(dshash_table));
279 
280  /* Find the control object in shared memory. */
281  control = handle;
282 
283  /* Set up the local hash table struct. */
284  hash_table->area = area;
285  hash_table->params = *params;
286  hash_table->arg = arg;
287  hash_table->control = dsa_get_address(area, control);
288  hash_table->find_locked = false;
289  hash_table->find_exclusively_locked = false;
290  Assert(hash_table->control->magic == DSHASH_MAGIC);
291 
292  /*
293  * These will later be set to the correct values by
294  * ensure_valid_bucket_pointers(), at which time we'll be holding a
295  * partition lock for interlocking against concurrent resizing.
296  */
297  hash_table->buckets = NULL;
298  hash_table->size_log2 = 0;
299 
300  return hash_table;
301 }
302 
303 /*
304  * Detach from a hash table. This frees backend-local resources associated
305  * with the hash table, but the hash table will continue to exist until it is
306  * either explicitly destroyed (by a backend that is still attached to it), or
307  * the area that backs it is returned to the operating system.
308  */
309 void
311 {
312  Assert(!hash_table->find_locked);
313 
314  /* The hash table may have been destroyed. Just free local memory. */
315  pfree(hash_table);
316 }
317 
318 /*
319  * Destroy a hash table, returning all memory to the area. The caller must be
320  * certain that no other backend will attempt to access the hash table before
321  * calling this function. Other backend must explicitly call dshash_detach to
322  * free up backend-local memory associated with the hash table. The backend
323  * that calls dshash_destroy must not call dshash_detach.
324  */
325 void
327 {
328  size_t size;
329  size_t i;
330 
331  Assert(hash_table->control->magic == DSHASH_MAGIC);
332  ensure_valid_bucket_pointers(hash_table);
333 
334  /* Free all the entries. */
335  size = NUM_BUCKETS(hash_table->size_log2);
336  for (i = 0; i < size; ++i)
337  {
338  dsa_pointer item_pointer = hash_table->buckets[i];
339 
340  while (DsaPointerIsValid(item_pointer))
341  {
342  dshash_table_item *item;
343  dsa_pointer next_item_pointer;
344 
345  item = dsa_get_address(hash_table->area, item_pointer);
346  next_item_pointer = item->next;
347  dsa_free(hash_table->area, item_pointer);
348  item_pointer = next_item_pointer;
349  }
350  }
351 
352  /*
353  * Vandalize the control block to help catch programming errors where
354  * other backends access the memory formerly occupied by this hash table.
355  */
356  hash_table->control->magic = 0;
357 
358  /* Free the active table and control object. */
359  dsa_free(hash_table->area, hash_table->control->buckets);
360  dsa_free(hash_table->area, hash_table->control->handle);
361 
362  pfree(hash_table);
363 }
364 
365 /*
366  * Get a handle that can be used by other processes to attach to this hash
367  * table.
368  */
371 {
372  Assert(hash_table->control->magic == DSHASH_MAGIC);
373 
374  return hash_table->control->handle;
375 }
376 
377 /*
378  * Look up an entry, given a key. Returns a pointer to an entry if one can be
379  * found with the given key. Returns NULL if the key is not found. If a
380  * non-NULL value is returned, the entry is locked and must be released by
381  * calling dshash_release_lock. If an error is raised before
382  * dshash_release_lock is called, the lock will be released automatically, but
383  * the caller must take care to ensure that the entry is not left corrupted.
384  * The lock mode is either shared or exclusive depending on 'exclusive'.
385  *
386  * The caller must not hold a lock already.
387  *
388  * Note that the lock held is in fact an LWLock, so interrupts will be held on
389  * return from this function, and not resumed until dshash_release_lock is
390  * called. It is a very good idea for the caller to release the lock quickly.
391  */
392 void *
393 dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
394 {
396  size_t partition;
397  dshash_table_item *item;
398 
399  hash = hash_key(hash_table, key);
400  partition = PARTITION_FOR_HASH(hash);
401 
402  Assert(hash_table->control->magic == DSHASH_MAGIC);
403  Assert(!hash_table->find_locked);
404 
405  LWLockAcquire(PARTITION_LOCK(hash_table, partition),
406  exclusive ? LW_EXCLUSIVE : LW_SHARED);
407  ensure_valid_bucket_pointers(hash_table);
408 
409  /* Search the active bucket. */
410  item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
411 
412  if (!item)
413  {
414  /* Not found. */
415  LWLockRelease(PARTITION_LOCK(hash_table, partition));
416  return NULL;
417  }
418  else
419  {
420  /* The caller will free the lock by calling dshash_release_lock. */
421  hash_table->find_locked = true;
422  hash_table->find_exclusively_locked = exclusive;
423  return ENTRY_FROM_ITEM(item);
424  }
425 }
426 
427 /*
428  * Returns a pointer to an exclusively locked item which must be released with
429  * dshash_release_lock. If the key is found in the hash table, 'found' is set
430  * to true and a pointer to the existing entry is returned. If the key is not
431  * found, 'found' is set to false, and a pointer to a newly created entry is
432  * returned.
433  *
434  * Notes above dshash_find() regarding locking and error handling equally
435  * apply here.
436  */
437 void *
439  const void *key,
440  bool *found)
441 {
443  size_t partition_index;
444  dshash_partition *partition;
445  dshash_table_item *item;
446 
447  hash = hash_key(hash_table, key);
448  partition_index = PARTITION_FOR_HASH(hash);
449  partition = &hash_table->control->partitions[partition_index];
450 
451  Assert(hash_table->control->magic == DSHASH_MAGIC);
452  Assert(!hash_table->find_locked);
453 
454 restart:
455  LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
456  LW_EXCLUSIVE);
457  ensure_valid_bucket_pointers(hash_table);
458 
459  /* Search the active bucket. */
460  item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
461 
462  if (item)
463  *found = true;
464  else
465  {
466  *found = false;
467 
468  /* Check if we are getting too full. */
469  if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
470  {
471  /*
472  * The load factor (= keys / buckets) for all buckets protected by
473  * this partition is > 0.75. Presumably the same applies
474  * generally across the whole hash table (though we don't attempt
475  * to track that directly to avoid contention on some kind of
476  * central counter; we just assume that this partition is
477  * representative). This is a good time to resize.
478  *
479  * Give up our existing lock first, because resizing needs to
480  * reacquire all the locks in the right order to avoid deadlocks.
481  */
482  LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
483  resize(hash_table, hash_table->size_log2 + 1);
484 
485  goto restart;
486  }
487 
488  /* Finally we can try to insert the new item. */
489  item = insert_into_bucket(hash_table, key,
490  &BUCKET_FOR_HASH(hash_table, hash));
491  item->hash = hash;
492  /* Adjust per-lock-partition counter for load factor knowledge. */
493  ++partition->count;
494  }
495 
496  /* The caller must release the lock with dshash_release_lock. */
497  hash_table->find_locked = true;
498  hash_table->find_exclusively_locked = true;
499  return ENTRY_FROM_ITEM(item);
500 }
501 
502 /*
503  * Remove an entry by key. Returns true if the key was found and the
504  * corresponding entry was removed.
505  *
506  * To delete an entry that you already have a pointer to, see
507  * dshash_delete_entry.
508  */
509 bool
510 dshash_delete_key(dshash_table *hash_table, const void *key)
511 {
513  size_t partition;
514  bool found;
515 
516  Assert(hash_table->control->magic == DSHASH_MAGIC);
517  Assert(!hash_table->find_locked);
518 
519  hash = hash_key(hash_table, key);
520  partition = PARTITION_FOR_HASH(hash);
521 
522  LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
523  ensure_valid_bucket_pointers(hash_table);
524 
525  if (delete_key_from_bucket(hash_table, key,
526  &BUCKET_FOR_HASH(hash_table, hash)))
527  {
528  Assert(hash_table->control->partitions[partition].count > 0);
529  found = true;
530  --hash_table->control->partitions[partition].count;
531  }
532  else
533  found = false;
534 
535  LWLockRelease(PARTITION_LOCK(hash_table, partition));
536 
537  return found;
538 }
539 
540 /*
541  * Remove an entry. The entry must already be exclusively locked, and must
542  * have been obtained by dshash_find or dshash_find_or_insert. Note that this
543  * function releases the lock just like dshash_release_lock.
544  *
545  * To delete an entry by key, see dshash_delete_key.
546  */
547 void
548 dshash_delete_entry(dshash_table *hash_table, void *entry)
549 {
550  dshash_table_item *item = ITEM_FROM_ENTRY(entry);
551  size_t partition = PARTITION_FOR_HASH(item->hash);
552 
553  Assert(hash_table->control->magic == DSHASH_MAGIC);
554  Assert(hash_table->find_locked);
555  Assert(hash_table->find_exclusively_locked);
556  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
557  LW_EXCLUSIVE));
558 
559  delete_item(hash_table, item);
560  hash_table->find_locked = false;
561  hash_table->find_exclusively_locked = false;
562  LWLockRelease(PARTITION_LOCK(hash_table, partition));
563 }
564 
565 /*
566  * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
567  */
568 void
569 dshash_release_lock(dshash_table *hash_table, void *entry)
570 {
571  dshash_table_item *item = ITEM_FROM_ENTRY(entry);
572  size_t partition_index = PARTITION_FOR_HASH(item->hash);
573 
574  Assert(hash_table->control->magic == DSHASH_MAGIC);
575  Assert(hash_table->find_locked);
576  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index),
577  hash_table->find_exclusively_locked
578  ? LW_EXCLUSIVE : LW_SHARED));
579 
580  hash_table->find_locked = false;
581  hash_table->find_exclusively_locked = false;
582  LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
583 }
584 
585 /*
586  * A compare function that forwards to memcmp.
587  */
588 int
589 dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
590 {
591  return memcmp(a, b, size);
592 }
593 
594 /*
595  * A hash function that forwards to tag_hash.
596  */
598 dshash_memhash(const void *v, size_t size, void *arg)
599 {
600  return tag_hash(v, size);
601 }
602 
603 /*
604  * Sequentially scan through dshash table and return all the elements one by
605  * one, return NULL when all elements have been returned.
606  *
607  * dshash_seq_term needs to be called when a scan finished. The caller may
608  * delete returned elements midst of a scan by using dshash_delete_current()
609  * if exclusive = true.
610  */
611 void
613  bool exclusive)
614 {
615  status->hash_table = hash_table;
616  status->curbucket = 0;
617  status->nbuckets = 0;
618  status->curitem = NULL;
619  status->pnextitem = InvalidDsaPointer;
620  status->curpartition = -1;
621  status->exclusive = exclusive;
622 }
623 
624 /*
625  * Returns the next element.
626  *
627  * Returned elements are locked and the caller may not release the lock. It is
628  * released by future calls to dshash_seq_next() or dshash_seq_term().
629  */
630 void *
632 {
633  dsa_pointer next_item_pointer;
634 
635  /*
636  * Not yet holding any partition locks. Need to determine the size of the
637  * hash table, it could have been resized since we were looking last.
638  * Since we iterate in partition order, we can start by unconditionally
639  * lock partition 0.
640  *
641  * Once we hold the lock, no resizing can happen until the scan ends. So
642  * we don't need to repeatedly call ensure_valid_bucket_pointers().
643  */
644  if (status->curpartition == -1)
645  {
646  Assert(status->curbucket == 0);
647  Assert(!status->hash_table->find_locked);
648 
649  status->curpartition = 0;
650 
651  LWLockAcquire(PARTITION_LOCK(status->hash_table,
652  status->curpartition),
653  status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
654 
656 
657  status->nbuckets =
658  NUM_BUCKETS(status->hash_table->control->size_log2);
659  next_item_pointer = status->hash_table->buckets[status->curbucket];
660  }
661  else
662  next_item_pointer = status->pnextitem;
663 
665  status->curpartition),
666  status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
667 
668  /* Move to the next bucket if we finished the current bucket */
669  while (!DsaPointerIsValid(next_item_pointer))
670  {
671  int next_partition;
672 
673  if (++status->curbucket >= status->nbuckets)
674  {
675  /* all buckets have been scanned. finish. */
676  return NULL;
677  }
678 
679  /* Check if move to the next partition */
680  next_partition =
682  status->hash_table->size_log2);
683 
684  if (status->curpartition != next_partition)
685  {
686  /*
687  * Move to the next partition. Lock the next partition then
688  * release the current, not in the reverse order to avoid
689  * concurrent resizing. Avoid dead lock by taking lock in the
690  * same order with resize().
691  */
692  LWLockAcquire(PARTITION_LOCK(status->hash_table,
693  next_partition),
694  status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
695  LWLockRelease(PARTITION_LOCK(status->hash_table,
696  status->curpartition));
697  status->curpartition = next_partition;
698  }
699 
700  next_item_pointer = status->hash_table->buckets[status->curbucket];
701  }
702 
703  status->curitem =
704  dsa_get_address(status->hash_table->area, next_item_pointer);
705  status->hash_table->find_locked = true;
706  status->hash_table->find_exclusively_locked = status->exclusive;
707 
708  /*
709  * The caller may delete the item. Store the next item in case of
710  * deletion.
711  */
712  status->pnextitem = status->curitem->next;
713 
714  return ENTRY_FROM_ITEM(status->curitem);
715 }
716 
717 /*
718  * Terminates the seqscan and release all locks.
719  *
720  * Needs to be called after finishing or when exiting a seqscan.
721  */
722 void
724 {
725  status->hash_table->find_locked = false;
726  status->hash_table->find_exclusively_locked = false;
727 
728  if (status->curpartition >= 0)
729  LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
730 }
731 
732 /*
733  * Remove the current entry of the seq scan.
734  */
735 void
737 {
738  dshash_table *hash_table = status->hash_table;
739  dshash_table_item *item = status->curitem;
740  size_t partition PG_USED_FOR_ASSERTS_ONLY;
741 
742  partition = PARTITION_FOR_HASH(item->hash);
743 
744  Assert(status->exclusive);
745  Assert(hash_table->control->magic == DSHASH_MAGIC);
746  Assert(hash_table->find_locked);
747  Assert(hash_table->find_exclusively_locked);
748  Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
749  LW_EXCLUSIVE));
750 
751  delete_item(hash_table, item);
752 }
753 
754 /*
755  * Print debugging information about the internal state of the hash table to
756  * stderr. The caller must hold no partition locks.
757  */
758 void
760 {
761  size_t i;
762  size_t j;
763 
764  Assert(hash_table->control->magic == DSHASH_MAGIC);
765  Assert(!hash_table->find_locked);
766 
767  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
768  {
769  Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
770  LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
771  }
772 
773  ensure_valid_bucket_pointers(hash_table);
774 
775  fprintf(stderr,
776  "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
777  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
778  {
779  dshash_partition *partition = &hash_table->control->partitions[i];
780  size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
781  size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
782 
783  fprintf(stderr, " partition %zu\n", i);
784  fprintf(stderr,
785  " active buckets (key count = %zu)\n", partition->count);
786 
787  for (j = begin; j < end; ++j)
788  {
789  size_t count = 0;
790  dsa_pointer bucket = hash_table->buckets[j];
791 
792  while (DsaPointerIsValid(bucket))
793  {
794  dshash_table_item *item;
795 
796  item = dsa_get_address(hash_table->area, bucket);
797 
798  bucket = item->next;
799  ++count;
800  }
801  fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
802  }
803  }
804 
805  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
806  LWLockRelease(PARTITION_LOCK(hash_table, i));
807 }
808 
809 /*
810  * Delete a locked item to which we have a pointer.
811  */
812 static void
814 {
815  size_t hash = item->hash;
816  size_t partition = PARTITION_FOR_HASH(hash);
817 
818  Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
819 
820  if (delete_item_from_bucket(hash_table, item,
821  &BUCKET_FOR_HASH(hash_table, hash)))
822  {
823  Assert(hash_table->control->partitions[partition].count > 0);
824  --hash_table->control->partitions[partition].count;
825  }
826  else
827  {
828  Assert(false);
829  }
830 }
831 
832 /*
833  * Grow the hash table if necessary to the requested number of buckets. The
834  * requested size must be double some previously observed size.
835  *
836  * Must be called without any partition lock held.
837  */
838 static void
839 resize(dshash_table *hash_table, size_t new_size_log2)
840 {
841  dsa_pointer old_buckets;
842  dsa_pointer new_buckets_shared;
843  dsa_pointer *new_buckets;
844  size_t size;
845  size_t new_size = ((size_t) 1) << new_size_log2;
846  size_t i;
847 
848  /*
849  * Acquire the locks for all lock partitions. This is expensive, but we
850  * shouldn't have to do it many times.
851  */
852  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
853  {
854  Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
855 
857  if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
858  {
859  /*
860  * Another backend has already increased the size; we can avoid
861  * obtaining all the locks and return early.
862  */
863  LWLockRelease(PARTITION_LOCK(hash_table, 0));
864  return;
865  }
866  }
867 
868  Assert(new_size_log2 == hash_table->control->size_log2 + 1);
869 
870  /* Allocate the space for the new table. */
871  new_buckets_shared = dsa_allocate0(hash_table->area,
872  sizeof(dsa_pointer) * new_size);
873  new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
874 
875  /*
876  * We've allocated the new bucket array; all that remains to do now is to
877  * reinsert all items, which amounts to adjusting all the pointers.
878  */
879  size = ((size_t) 1) << hash_table->control->size_log2;
880  for (i = 0; i < size; ++i)
881  {
882  dsa_pointer item_pointer = hash_table->buckets[i];
883 
884  while (DsaPointerIsValid(item_pointer))
885  {
886  dshash_table_item *item;
887  dsa_pointer next_item_pointer;
888 
889  item = dsa_get_address(hash_table->area, item_pointer);
890  next_item_pointer = item->next;
891  insert_item_into_bucket(hash_table, item_pointer, item,
892  &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
893  new_size_log2)]);
894  item_pointer = next_item_pointer;
895  }
896  }
897 
898  /* Swap the hash table into place and free the old one. */
899  old_buckets = hash_table->control->buckets;
900  hash_table->control->buckets = new_buckets_shared;
901  hash_table->control->size_log2 = new_size_log2;
902  hash_table->buckets = new_buckets;
903  dsa_free(hash_table->area, old_buckets);
904 
905  /* Release all the locks. */
906  for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
907  LWLockRelease(PARTITION_LOCK(hash_table, i));
908 }
909 
910 /*
911  * Make sure that our backend-local bucket pointers are up to date. The
912  * caller must have locked one lock partition, which prevents resize() from
913  * running concurrently.
914  */
915 static inline void
917 {
918  if (hash_table->size_log2 != hash_table->control->size_log2)
919  {
920  hash_table->buckets = dsa_get_address(hash_table->area,
921  hash_table->control->buckets);
922  hash_table->size_log2 = hash_table->control->size_log2;
923  }
924 }
925 
926 /*
927  * Scan a locked bucket for a match, using the provided compare function.
928  */
929 static inline dshash_table_item *
930 find_in_bucket(dshash_table *hash_table, const void *key,
931  dsa_pointer item_pointer)
932 {
933  while (DsaPointerIsValid(item_pointer))
934  {
935  dshash_table_item *item;
936 
937  item = dsa_get_address(hash_table->area, item_pointer);
938  if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
939  return item;
940  item_pointer = item->next;
941  }
942  return NULL;
943 }
944 
945 /*
946  * Insert an already-allocated item into a bucket.
947  */
948 static void
950  dsa_pointer item_pointer,
951  dshash_table_item *item,
952  dsa_pointer *bucket)
953 {
954  Assert(item == dsa_get_address(hash_table->area, item_pointer));
955 
956  item->next = *bucket;
957  *bucket = item_pointer;
958 }
959 
960 /*
961  * Allocate space for an entry with the given key and insert it into the
962  * provided bucket.
963  */
964 static dshash_table_item *
966  const void *key,
967  dsa_pointer *bucket)
968 {
969  dsa_pointer item_pointer;
970  dshash_table_item *item;
971 
972  item_pointer = dsa_allocate(hash_table->area,
973  hash_table->params.entry_size +
974  MAXALIGN(sizeof(dshash_table_item)));
975  item = dsa_get_address(hash_table->area, item_pointer);
976  memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size);
977  insert_item_into_bucket(hash_table, item_pointer, item, bucket);
978  return item;
979 }
980 
981 /*
982  * Search a bucket for a matching key and delete it.
983  */
984 static bool
986  const void *key,
987  dsa_pointer *bucket_head)
988 {
989  while (DsaPointerIsValid(*bucket_head))
990  {
991  dshash_table_item *item;
992 
993  item = dsa_get_address(hash_table->area, *bucket_head);
994 
995  if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
996  {
998 
999  next = item->next;
1000  dsa_free(hash_table->area, *bucket_head);
1001  *bucket_head = next;
1002 
1003  return true;
1004  }
1005  bucket_head = &item->next;
1006  }
1007  return false;
1008 }
1009 
1010 /*
1011  * Delete the specified item from the bucket.
1012  */
1013 static bool
1015  dshash_table_item *item,
1016  dsa_pointer *bucket_head)
1017 {
1018  while (DsaPointerIsValid(*bucket_head))
1019  {
1020  dshash_table_item *bucket_item;
1021 
1022  bucket_item = dsa_get_address(hash_table->area, *bucket_head);
1023 
1024  if (bucket_item == item)
1025  {
1026  dsa_pointer next;
1027 
1028  next = item->next;
1029  dsa_free(hash_table->area, *bucket_head);
1030  *bucket_head = next;
1031  return true;
1032  }
1033  bucket_head = &bucket_item->next;
1034  }
1035  return false;
1036 }
1037 
1038 /*
1039  * Compute the hash value for a key.
1040  */
1041 static inline dshash_hash
1042 hash_key(dshash_table *hash_table, const void *key)
1043 {
1044  return hash_table->params.hash_function(key,
1045  hash_table->params.key_size,
1046  hash_table->arg);
1047 }
1048 
1049 /*
1050  * Check whether two keys compare equal.
1051  */
1052 static inline bool
1053 equal_keys(dshash_table *hash_table, const void *a, const void *b)
1054 {
1055  return hash_table->params.compare_function(a, b,
1056  hash_table->params.key_size,
1057  hash_table->arg) == 0;
1058 }
static int32 next
Definition: blutils.c:219
unsigned int uint32
Definition: c.h:441
#define MAXALIGN(LEN)
Definition: c.h:757
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
dsa_pointer dsa_allocate_extended(dsa_area *area, size_t size, int flags)
Definition: dsa.c:665
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:932
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:820
#define dsa_allocate0(area, size)
Definition: dsa.h:88
uint64 dsa_pointer
Definition: dsa.h:62
#define dsa_allocate(area, size)
Definition: dsa.h:84
#define InvalidDsaPointer
Definition: dsa.h:78
#define DSA_ALLOC_NO_OOM
Definition: dsa.h:74
#define DsaPointerIsValid(x)
Definition: dsa.h:81
#define DSA_ALLOC_ZERO
Definition: dsa.h:75
bool dshash_delete_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:510
void dshash_delete_entry(dshash_table *hash_table, void *entry)
Definition: dshash.c:548
void dshash_destroy(dshash_table *hash_table)
Definition: dshash.c:326
void dshash_release_lock(dshash_table *hash_table, void *entry)
Definition: dshash.c:569
#define PARTITION_LOCK(hash_table, i)
Definition: dshash.c:194
void dshash_detach(dshash_table *hash_table)
Definition: dshash.c:310
void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, bool exclusive)
Definition: dshash.c:612
static void resize(dshash_table *hash_table, size_t new_size)
Definition: dshash.c:839
#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)
Definition: dshash.c:153
static bool delete_key_from_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket_head)
Definition: dshash.c:985
void * dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
Definition: dshash.c:393
static dshash_hash hash_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:1042
#define ITEM_FROM_ENTRY(entry)
Definition: dshash.c:122
dshash_table_handle dshash_get_hash_table_handle(dshash_table *hash_table)
Definition: dshash.c:370
void dshash_dump(dshash_table *hash_table)
Definition: dshash.c:759
static bool equal_keys(dshash_table *hash_table, const void *a, const void *b)
Definition: dshash.c:1053
void dshash_seq_term(dshash_seq_status *status)
Definition: dshash.c:723
#define DSHASH_MAGIC
Definition: dshash.c:65
static void insert_item_into_bucket(dshash_table *hash_table, dsa_pointer item_pointer, dshash_table_item *item, dsa_pointer *bucket)
Definition: dshash.c:949
static bool delete_item_from_bucket(dshash_table *hash_table, dshash_table_item *item, dsa_pointer *bucket_head)
Definition: dshash.c:1014
#define DSHASH_NUM_PARTITIONS
Definition: dshash.c:62
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)
Definition: dshash.c:161
static dshash_table_item * insert_into_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket)
Definition: dshash.c:965
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)
Definition: dshash.c:157
#define MAX_COUNT_PER_PARTITION(hash_table)
Definition: dshash.c:139
#define ENTRY_FROM_ITEM(item)
Definition: dshash.c:118
#define DSHASH_NUM_PARTITIONS_LOG2
Definition: dshash.c:61
static void delete_item(dshash_table *hash_table, dshash_table_item *item)
Definition: dshash.c:813
dshash_hash dshash_memhash(const void *v, size_t size, void *arg)
Definition: dshash.c:598
#define NUM_BUCKETS(size_log2)
Definition: dshash.c:131
static void ensure_valid_bucket_pointers(dshash_table *hash_table)
Definition: dshash.c:916
static dshash_table_item * find_in_bucket(dshash_table *hash_table, const void *key, dsa_pointer item_pointer)
Definition: dshash.c:930
void * dshash_find_or_insert(dshash_table *hash_table, const void *key, bool *found)
Definition: dshash.c:438
dshash_table * dshash_attach(dsa_area *area, const dshash_parameters *params, dshash_table_handle handle, void *arg)
Definition: dshash.c:271
int dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:589
struct dshash_table_control dshash_table_control
dshash_table * dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
Definition: dshash.c:204
#define PARTITION_FOR_HASH(hash)
Definition: dshash.c:144
struct dshash_partition dshash_partition
void * dshash_seq_next(dshash_seq_status *status)
Definition: dshash.c:631
#define BUCKET_FOR_HASH(hash_table, hash)
Definition: dshash.c:165
void dshash_delete_current(dshash_seq_status *status)
Definition: dshash.c:736
dsa_pointer dshash_table_handle
Definition: dshash.h:24
uint32 dshash_hash
Definition: dshash.h:27
int errdetail(const char *fmt,...)
Definition: elog.c:1037
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define ereport(elevel,...)
Definition: elog.h:143
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1196
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1800
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1934
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:734
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1916
@ LW_SHARED
Definition: lwlock.h:105
@ LW_EXCLUSIVE
Definition: lwlock.h:104
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc(Size size)
Definition: mcxt.c:1068
void * arg
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:229
static int partitions
Definition: pgbench.c:236
#define fprintf
Definition: port.h:229
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
Definition: lwlock.h:32
Definition: dsa.c:355
dshash_hash_function hash_function
Definition: dshash.h:54
size_t key_size
Definition: dshash.h:51
dshash_compare_function compare_function
Definition: dshash.h:53
size_t entry_size
Definition: dshash.h:52
LWLock lock
Definition: dshash.c:77
size_t count
Definition: dshash.c:78
size_t size_log2
Definition: dshash.c:98
dshash_partition partitions[DSHASH_NUM_PARTITIONS]
Definition: dshash.c:89
dshash_table_handle handle
Definition: dshash.c:87
dsa_pointer buckets
Definition: dshash.c:99
dshash_hash hash
Definition: dshash.c:51
dsa_pointer next
Definition: dshash.c:49
bool find_locked
Definition: dshash.c:113
dshash_parameters params
Definition: dshash.c:108
dsa_pointer * buckets
Definition: dshash.c:111
bool find_exclusively_locked
Definition: dshash.c:114
dsa_area * area
Definition: dshash.c:107
void * arg
Definition: dshash.c:109
size_t size_log2
Definition: dshash.c:112
dshash_table_control * control
Definition: dshash.c:110