PostgreSQL Source Code  git master
dynahash.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dynahash.c
4  * dynamic chained hash tables
5  *
6  * dynahash.c supports both local-to-a-backend hash tables and hash tables in
7  * shared memory. For shared hash tables, it is the caller's responsibility
8  * to provide appropriate access interlocking. The simplest convention is
9  * that a single LWLock protects the whole hash table. Searches (HASH_FIND or
10  * hash_seq_search) need only shared lock, but any update requires exclusive
11  * lock. For heavily-used shared tables, the single-lock approach creates a
12  * concurrency bottleneck, so we also support "partitioned" locking wherein
13  * there are multiple LWLocks guarding distinct subsets of the table. To use
14  * a hash table in partitioned mode, the HASH_PARTITION flag must be given
15  * to hash_create. This prevents any attempt to split buckets on-the-fly.
16  * Therefore, each hash bucket chain operates independently, and no fields
17  * of the hash header change after init except nentries and freeList.
18  * (A partitioned table uses multiple copies of those fields, guarded by
19  * spinlocks, for additional concurrency.)
20  * This lets any subset of the hash buckets be treated as a separately
21  * lockable partition. We expect callers to use the low-order bits of a
22  * lookup key's hash value as a partition number --- this will work because
23  * of the way calc_bucket() maps hash values to bucket numbers.
24  *
25  * For hash tables in shared memory, the memory allocator function should
26  * match malloc's semantics of returning NULL on failure. For hash tables
27  * in local memory, we typically use palloc() which will throw error on
28  * failure. The code in this file has to cope with both cases.
29  *
30  * dynahash.c provides support for these types of lookup keys:
31  *
32  * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
33  * compared as though by strcmp(). This is selected by specifying the
34  * HASH_STRINGS flag to hash_create.
35  *
36  * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
37  * (Caller must ensure there are no undefined padding bits in the keys!)
38  * This is selected by specifying the HASH_BLOBS flag to hash_create.
39  *
40  * 3. More complex key behavior can be selected by specifying user-supplied
41  * hashing, comparison, and/or key-copying functions. At least a hashing
42  * function must be supplied; comparison defaults to memcmp() and key copying
43  * to memcpy() when a user-defined hashing function is selected.
44  *
45  * Compared to simplehash, dynahash has the following benefits:
46  *
47  * - It supports partitioning, which is useful for shared memory access using
48  * locks.
49  * - Shared memory hashes are allocated in a fixed size area at startup and
50  * are discoverable by name from other processes.
51  * - Because entries don't need to be moved in the case of hash conflicts,
52  * dynahash has better performance for large entries.
53  * - Guarantees stable pointers to entries.
54  *
55  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
56  * Portions Copyright (c) 1994, Regents of the University of California
57  *
58  *
59  * IDENTIFICATION
60  * src/backend/utils/hash/dynahash.c
61  *
62  *-------------------------------------------------------------------------
63  */
64 
65 /*
66  * Original comments:
67  *
68  * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
69  * Coded into C, with minor code improvements, and with hsearch(3) interface,
70  * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
71  * also, hcreate/hdestroy routines added to simulate hsearch(3).
72  *
73  * These routines simulate hsearch(3) and family, with the important
74  * difference that the hash table is dynamic - can grow indefinitely
75  * beyond its original size (as supplied to hcreate()).
76  *
77  * Performance appears to be comparable to that of hsearch(3).
78  * The 'source-code' options referred to in hsearch(3)'s 'man' page
79  * are not implemented; otherwise functionality is identical.
80  *
81  * Compilation controls:
82  * HASH_DEBUG controls some informative traces, mainly for debugging.
83  * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
84  * when combined with HASH_DEBUG, these are displayed by hdestroy().
85  *
86  * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
87  * concatenation property, in probably unnecessary code 'optimization'.
88  *
89  * Modified margo@postgres.berkeley.edu February 1990
90  * added multiple table interface
91  * Modified by sullivan@postgres.berkeley.edu April 1990
92  * changed ctl structure for shared memory
93  */
94 
95 #include "postgres.h"
96 
97 #include <limits.h>
98 
99 #include "access/xact.h"
100 #include "common/hashfn.h"
101 #include "port/pg_bitutils.h"
102 #include "storage/shmem.h"
103 #include "storage/spin.h"
104 #include "utils/dynahash.h"
105 #include "utils/memutils.h"
106 
107 
108 /*
109  * Constants
110  *
111  * A hash table has a top-level "directory", each of whose entries points
112  * to a "segment" of ssize bucket headers. The maximum number of hash
113  * buckets is thus dsize * ssize (but dsize may be expansible). Of course,
114  * the number of records in the table can be larger, but we don't want a
115  * whole lot of records per bucket or performance goes down.
116  *
117  * In a hash table allocated in shared memory, the directory cannot be
118  * expanded because it must stay at a fixed address. The directory size
119  * should be selected using hash_select_dirsize (and you'd better have
120  * a good idea of the maximum number of entries!). For non-shared hash
121  * tables, the initial directory size can be left at the default.
122  */
123 #define DEF_SEGSIZE 256
124 #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
125 #define DEF_DIRSIZE 256
126 
127 /* Number of freelists to be used for a partitioned hash table. */
128 #define NUM_FREELISTS 32
129 
130 /* A hash bucket is a linked list of HASHELEMENTs */
132 
133 /* A hash segment is an array of bucket headers */
135 
136 /*
137  * Per-freelist data.
138  *
139  * In a partitioned hash table, each freelist is associated with a specific
140  * set of hashcodes, as determined by the FREELIST_IDX() macro below.
141  * nentries tracks the number of live hashtable entries having those hashcodes
142  * (NOT the number of entries in the freelist, as you might expect).
143  *
144  * The coverage of a freelist might be more or less than one partition, so it
145  * needs its own lock rather than relying on caller locking. Relying on that
146  * wouldn't work even if the coverage was the same, because of the occasional
147  * need to "borrow" entries from another freelist; see get_hash_entry().
148  *
149  * Using an array of FreeListData instead of separate arrays of mutexes,
150  * nentries and freeLists helps to reduce sharing of cache lines between
151  * different mutexes.
152  */
153 typedef struct
154 {
155  slock_t mutex; /* spinlock for this freelist */
156  long nentries; /* number of entries in associated buckets */
157  HASHELEMENT *freeList; /* chain of free elements */
158 } FreeListData;
159 
160 /*
161  * Header structure for a hash table --- contains all changeable info
162  *
163  * In a shared-memory hash table, the HASHHDR is in shared memory, while
164  * each backend has a local HTAB struct. For a non-shared table, there isn't
165  * any functional difference between HASHHDR and HTAB, but we separate them
166  * anyway to share code between shared and non-shared tables.
167  */
168 struct HASHHDR
169 {
170  /*
171  * The freelist can become a point of contention in high-concurrency hash
172  * tables, so we use an array of freelists, each with its own mutex and
173  * nentries count, instead of just a single one. Although the freelists
174  * normally operate independently, we will scavenge entries from freelists
175  * other than a hashcode's default freelist when necessary.
176  *
177  * If the hash table is not partitioned, only freeList[0] is used and its
178  * spinlock is not used at all; callers' locking is assumed sufficient.
179  */
181 
182  /* These fields can change, but not in a partitioned table */
183  /* Also, dsize can't change in a shared table, even if unpartitioned */
184  long dsize; /* directory size */
185  long nsegs; /* number of allocated segments (<= dsize) */
186  uint32 max_bucket; /* ID of maximum bucket in use */
187  uint32 high_mask; /* mask to modulo into entire table */
188  uint32 low_mask; /* mask to modulo into lower half of table */
189 
190  /* These fields are fixed at hashtable creation */
191  Size keysize; /* hash key length in bytes */
192  Size entrysize; /* total user element size in bytes */
193  long num_partitions; /* # partitions (must be power of 2), or 0 */
194  long max_dsize; /* 'dsize' limit if directory is fixed size */
195  long ssize; /* segment size --- must be power of 2 */
196  int sshift; /* segment shift = log2(ssize) */
197  int nelem_alloc; /* number of entries to allocate at once */
198 
199 #ifdef HASH_STATISTICS
200 
201  /*
202  * Count statistics here. NB: stats code doesn't bother with mutex, so
203  * counts could be corrupted a bit in a partitioned table.
204  */
205  long accesses;
206  long collisions;
207 #endif
208 };
209 
210 #define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
211 
212 #define FREELIST_IDX(hctl, hashcode) \
213  (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
214 
215 /*
216  * Top control structure for a hashtable --- in a shared table, each backend
217  * has its own copy (OK since no fields change at runtime)
218  */
219 struct HTAB
220 {
221  HASHHDR *hctl; /* => shared control information */
222  HASHSEGMENT *dir; /* directory of segment starts */
223  HashValueFunc hash; /* hash function */
224  HashCompareFunc match; /* key comparison function */
225  HashCopyFunc keycopy; /* key copying function */
226  HashAllocFunc alloc; /* memory allocator */
227  MemoryContext hcxt; /* memory context if default allocator used */
228  char *tabname; /* table name (for error messages) */
229  bool isshared; /* true if table is in shared memory */
230  bool isfixed; /* if true, don't enlarge */
231 
232  /* freezing a shared table isn't allowed, so we can keep state here */
233  bool frozen; /* true = no more inserts allowed */
234 
235  /* We keep local copies of these fixed values to reduce contention */
236  Size keysize; /* hash key length in bytes */
237  long ssize; /* segment size --- must be power of 2 */
238  int sshift; /* segment shift = log2(ssize) */
239 };
240 
241 /*
242  * Key (also entry) part of a HASHELEMENT
243  */
244 #define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
245 
246 /*
247  * Obtain element pointer given pointer to key
248  */
249 #define ELEMENT_FROM_KEY(key) \
250  ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
251 
252 /*
253  * Fast MOD arithmetic, assuming that y is a power of 2 !
254  */
255 #define MOD(x,y) ((x) & ((y)-1))
256 
257 #ifdef HASH_STATISTICS
258 static long hash_accesses,
259  hash_collisions,
260  hash_expansions;
261 #endif
262 
263 /*
264  * Private function prototypes
265  */
266 static void *DynaHashAlloc(Size size);
267 static HASHSEGMENT seg_alloc(HTAB *hashp);
268 static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
269 static bool dir_realloc(HTAB *hashp);
270 static bool expand_table(HTAB *hashp);
271 static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
272 static void hdefault(HTAB *hashp);
273 static int choose_nelem_alloc(Size entrysize);
274 static bool init_htab(HTAB *hashp, long nelem);
275 static void hash_corrupted(HTAB *hashp) pg_attribute_noreturn();
276 static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
277  HASHBUCKET **bucketptr);
278 static long next_pow2_long(long num);
279 static int next_pow2_int(long num);
280 static void register_seq_scan(HTAB *hashp);
281 static void deregister_seq_scan(HTAB *hashp);
282 static bool has_seq_scans(HTAB *hashp);
283 
284 
285 /*
286  * memory allocation support
287  */
289 
290 static void *
292 {
296 }
297 
298 
299 /*
300  * HashCompareFunc for string keys
301  *
302  * Because we copy keys with strlcpy(), they will be truncated at keysize-1
303  * bytes, so we can only compare that many ... hence strncmp is almost but
304  * not quite the right thing.
305  */
306 static int
307 string_compare(const char *key1, const char *key2, Size keysize)
308 {
309  return strncmp(key1, key2, keysize - 1);
310 }
311 
312 
313 /************************** CREATE ROUTINES **********************/
314 
315 /*
316  * hash_create -- create a new dynamic hash table
317  *
318  * tabname: a name for the table (for debugging purposes)
319  * nelem: maximum number of elements expected
320  * *info: additional table parameters, as indicated by flags
321  * flags: bitmask indicating which parameters to take from *info
322  *
323  * The flags value *must* include HASH_ELEM. (Formerly, this was nominally
324  * optional, but the default keysize and entrysize values were useless.)
325  * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,
326  * or HASH_FUNCTION, to define the key hashing semantics (C strings,
327  * binary blobs, or custom, respectively). Callers specifying a custom
328  * hash function will likely also want to use HASH_COMPARE, and perhaps
329  * also HASH_KEYCOPY, to control key comparison and copying.
330  * Another often-used flag is HASH_CONTEXT, to allocate the hash table
331  * under info->hcxt rather than under TopMemoryContext; the default
332  * behavior is only suitable for session-lifespan hash tables.
333  * Other flags bits are special-purpose and seldom used, except for those
334  * associated with shared-memory hash tables, for which see ShmemInitHash().
335  *
336  * Fields in *info are read only when the associated flags bit is set.
337  * It is not necessary to initialize other fields of *info.
338  * Neither tabname nor *info need persist after the hash_create() call.
339  *
340  * Note: It is deprecated for callers of hash_create() to explicitly specify
341  * string_hash, tag_hash, uint32_hash, or oid_hash. Just set HASH_STRINGS or
342  * HASH_BLOBS. Use HASH_FUNCTION only when you want something other than
343  * one of these.
344  *
345  * Note: for a shared-memory hashtable, nelem needs to be a pretty good
346  * estimate, since we can't expand the table on the fly. But an unshared
347  * hashtable can be expanded on-the-fly, so it's better for nelem to be
348  * on the small side and let the table grow if it's exceeded. An overly
349  * large nelem will penalize hash_seq_search speed without buying much.
350  */
351 HTAB *
352 hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
353 {
354  HTAB *hashp;
355  HASHHDR *hctl;
356 
357  /*
358  * Hash tables now allocate space for key and data, but you have to say
359  * how much space to allocate.
360  */
361  Assert(flags & HASH_ELEM);
362  Assert(info->keysize > 0);
363  Assert(info->entrysize >= info->keysize);
364 
365  /*
366  * For shared hash tables, we have a local hash header (HTAB struct) that
367  * we allocate in TopMemoryContext; all else is in shared memory.
368  *
369  * For non-shared hash tables, everything including the hash header is in
370  * a memory context created specially for the hash table --- this makes
371  * hash_destroy very simple. The memory context is made a child of either
372  * a context specified by the caller, or TopMemoryContext if nothing is
373  * specified.
374  */
375  if (flags & HASH_SHARED_MEM)
376  {
377  /* Set up to allocate the hash header */
379  }
380  else
381  {
382  /* Create the hash table's private memory context */
383  if (flags & HASH_CONTEXT)
384  CurrentDynaHashCxt = info->hcxt;
385  else
388  "dynahash",
390  }
391 
392  /* Initialize the hash header, plus a copy of the table name */
393  hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
394  MemSet(hashp, 0, sizeof(HTAB));
395 
396  hashp->tabname = (char *) (hashp + 1);
397  strcpy(hashp->tabname, tabname);
398 
399  /* If we have a private context, label it with hashtable's name */
400  if (!(flags & HASH_SHARED_MEM))
402 
403  /*
404  * Select the appropriate hash function (see comments at head of file).
405  */
406  if (flags & HASH_FUNCTION)
407  {
408  Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));
409  hashp->hash = info->hash;
410  }
411  else if (flags & HASH_BLOBS)
412  {
413  Assert(!(flags & HASH_STRINGS));
414  /* We can optimize hashing for common key sizes */
415  if (info->keysize == sizeof(uint32))
416  hashp->hash = uint32_hash;
417  else
418  hashp->hash = tag_hash;
419  }
420  else
421  {
422  /*
423  * string_hash used to be considered the default hash method, and in a
424  * non-assert build it effectively still is. But we now consider it
425  * an assertion error to not say HASH_STRINGS explicitly. To help
426  * catch mistaken usage of HASH_STRINGS, we also insist on a
427  * reasonably long string length: if the keysize is only 4 or 8 bytes,
428  * it's almost certainly an integer or pointer not a string.
429  */
430  Assert(flags & HASH_STRINGS);
431  Assert(info->keysize > 8);
432 
433  hashp->hash = string_hash;
434  }
435 
436  /*
437  * If you don't specify a match function, it defaults to string_compare if
438  * you used string_hash, and to memcmp otherwise.
439  *
440  * Note: explicitly specifying string_hash is deprecated, because this
441  * might not work for callers in loadable modules on some platforms due to
442  * referencing a trampoline instead of the string_hash function proper.
443  * Specify HASH_STRINGS instead.
444  */
445  if (flags & HASH_COMPARE)
446  hashp->match = info->match;
447  else if (hashp->hash == string_hash)
449  else
450  hashp->match = memcmp;
451 
452  /*
453  * Similarly, the key-copying function defaults to strlcpy or memcpy.
454  */
455  if (flags & HASH_KEYCOPY)
456  hashp->keycopy = info->keycopy;
457  else if (hashp->hash == string_hash)
458  {
459  /*
460  * The signature of keycopy is meant for memcpy(), which returns
461  * void*, but strlcpy() returns size_t. Since we never use the return
462  * value of keycopy, and size_t is pretty much always the same size as
463  * void *, this should be safe. The extra cast in the middle is to
464  * avoid warnings from -Wcast-function-type.
465  */
467  }
468  else
469  hashp->keycopy = memcpy;
470 
471  /* And select the entry allocation function, too. */
472  if (flags & HASH_ALLOC)
473  hashp->alloc = info->alloc;
474  else
475  hashp->alloc = DynaHashAlloc;
476 
477  if (flags & HASH_SHARED_MEM)
478  {
479  /*
480  * ctl structure and directory are preallocated for shared memory
481  * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
482  * well.
483  */
484  hashp->hctl = info->hctl;
485  hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
486  hashp->hcxt = NULL;
487  hashp->isshared = true;
488 
489  /* hash table already exists, we're just attaching to it */
490  if (flags & HASH_ATTACH)
491  {
492  /* make local copies of some heavily-used values */
493  hctl = hashp->hctl;
494  hashp->keysize = hctl->keysize;
495  hashp->ssize = hctl->ssize;
496  hashp->sshift = hctl->sshift;
497 
498  return hashp;
499  }
500  }
501  else
502  {
503  /* setup hash table defaults */
504  hashp->hctl = NULL;
505  hashp->dir = NULL;
506  hashp->hcxt = CurrentDynaHashCxt;
507  hashp->isshared = false;
508  }
509 
510  if (!hashp->hctl)
511  {
512  hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
513  if (!hashp->hctl)
514  ereport(ERROR,
515  (errcode(ERRCODE_OUT_OF_MEMORY),
516  errmsg("out of memory")));
517  }
518 
519  hashp->frozen = false;
520 
521  hdefault(hashp);
522 
523  hctl = hashp->hctl;
524 
525  if (flags & HASH_PARTITION)
526  {
527  /* Doesn't make sense to partition a local hash table */
528  Assert(flags & HASH_SHARED_MEM);
529 
530  /*
531  * The number of partitions had better be a power of 2. Also, it must
532  * be less than INT_MAX (see init_htab()), so call the int version of
533  * next_pow2.
534  */
536 
537  hctl->num_partitions = info->num_partitions;
538  }
539 
540  if (flags & HASH_SEGMENT)
541  {
542  hctl->ssize = info->ssize;
543  hctl->sshift = my_log2(info->ssize);
544  /* ssize had better be a power of 2 */
545  Assert(hctl->ssize == (1L << hctl->sshift));
546  }
547 
548  /*
549  * SHM hash tables have fixed directory size passed by the caller.
550  */
551  if (flags & HASH_DIRSIZE)
552  {
553  hctl->max_dsize = info->max_dsize;
554  hctl->dsize = info->dsize;
555  }
556 
557  /* remember the entry sizes, too */
558  hctl->keysize = info->keysize;
559  hctl->entrysize = info->entrysize;
560 
561  /* make local copies of heavily-used constant fields */
562  hashp->keysize = hctl->keysize;
563  hashp->ssize = hctl->ssize;
564  hashp->sshift = hctl->sshift;
565 
566  /* Build the hash directory structure */
567  if (!init_htab(hashp, nelem))
568  elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
569 
570  /*
571  * For a shared hash table, preallocate the requested number of elements.
572  * This reduces problems with run-time out-of-shared-memory conditions.
573  *
574  * For a non-shared hash table, preallocate the requested number of
575  * elements if it's less than our chosen nelem_alloc. This avoids wasting
576  * space if the caller correctly estimates a small table size.
577  */
578  if ((flags & HASH_SHARED_MEM) ||
579  nelem < hctl->nelem_alloc)
580  {
581  int i,
582  freelist_partitions,
583  nelem_alloc,
584  nelem_alloc_first;
585 
586  /*
587  * If hash table is partitioned, give each freelist an equal share of
588  * the initial allocation. Otherwise only freeList[0] is used.
589  */
590  if (IS_PARTITIONED(hashp->hctl))
591  freelist_partitions = NUM_FREELISTS;
592  else
593  freelist_partitions = 1;
594 
595  nelem_alloc = nelem / freelist_partitions;
596  if (nelem_alloc <= 0)
597  nelem_alloc = 1;
598 
599  /*
600  * Make sure we'll allocate all the requested elements; freeList[0]
601  * gets the excess if the request isn't divisible by NUM_FREELISTS.
602  */
603  if (nelem_alloc * freelist_partitions < nelem)
604  nelem_alloc_first =
605  nelem - nelem_alloc * (freelist_partitions - 1);
606  else
607  nelem_alloc_first = nelem_alloc;
608 
609  for (i = 0; i < freelist_partitions; i++)
610  {
611  int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
612 
613  if (!element_alloc(hashp, temp, i))
614  ereport(ERROR,
615  (errcode(ERRCODE_OUT_OF_MEMORY),
616  errmsg("out of memory")));
617  }
618  }
619 
620  if (flags & HASH_FIXED_SIZE)
621  hashp->isfixed = true;
622  return hashp;
623 }
624 
625 /*
626  * Set default HASHHDR parameters.
627  */
628 static void
629 hdefault(HTAB *hashp)
630 {
631  HASHHDR *hctl = hashp->hctl;
632 
633  MemSet(hctl, 0, sizeof(HASHHDR));
634 
635  hctl->dsize = DEF_DIRSIZE;
636  hctl->nsegs = 0;
637 
638  hctl->num_partitions = 0; /* not partitioned */
639 
640  /* table has no fixed maximum size */
641  hctl->max_dsize = NO_MAX_DSIZE;
642 
643  hctl->ssize = DEF_SEGSIZE;
644  hctl->sshift = DEF_SEGSIZE_SHIFT;
645 
646 #ifdef HASH_STATISTICS
647  hctl->accesses = hctl->collisions = 0;
648 #endif
649 }
650 
651 /*
652  * Given the user-specified entry size, choose nelem_alloc, ie, how many
653  * elements to add to the hash table when we need more.
654  */
655 static int
657 {
658  int nelem_alloc;
659  Size elementSize;
660  Size allocSize;
661 
662  /* Each element has a HASHELEMENT header plus user data. */
663  /* NB: this had better match element_alloc() */
664  elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
665 
666  /*
667  * The idea here is to choose nelem_alloc at least 32, but round up so
668  * that the allocation request will be a power of 2 or just less. This
669  * makes little difference for hash tables in shared memory, but for hash
670  * tables managed by palloc, the allocation request will be rounded up to
671  * a power of 2 anyway. If we fail to take this into account, we'll waste
672  * as much as half the allocated space.
673  */
674  allocSize = 32 * 4; /* assume elementSize at least 8 */
675  do
676  {
677  allocSize <<= 1;
678  nelem_alloc = allocSize / elementSize;
679  } while (nelem_alloc < 32);
680 
681  return nelem_alloc;
682 }
683 
684 /*
685  * Compute derived fields of hctl and build the initial directory/segment
686  * arrays
687  */
688 static bool
689 init_htab(HTAB *hashp, long nelem)
690 {
691  HASHHDR *hctl = hashp->hctl;
692  HASHSEGMENT *segp;
693  int nbuckets;
694  int nsegs;
695  int i;
696 
697  /*
698  * initialize mutexes if it's a partitioned table
699  */
700  if (IS_PARTITIONED(hctl))
701  for (i = 0; i < NUM_FREELISTS; i++)
702  SpinLockInit(&(hctl->freeList[i].mutex));
703 
704  /*
705  * Allocate space for the next greater power of two number of buckets,
706  * assuming a desired maximum load factor of 1.
707  */
708  nbuckets = next_pow2_int(nelem);
709 
710  /*
711  * In a partitioned table, nbuckets must be at least equal to
712  * num_partitions; were it less, keys with apparently different partition
713  * numbers would map to the same bucket, breaking partition independence.
714  * (Normally nbuckets will be much bigger; this is just a safety check.)
715  */
716  while (nbuckets < hctl->num_partitions)
717  nbuckets <<= 1;
718 
719  hctl->max_bucket = hctl->low_mask = nbuckets - 1;
720  hctl->high_mask = (nbuckets << 1) - 1;
721 
722  /*
723  * Figure number of directory segments needed, round up to a power of 2
724  */
725  nsegs = (nbuckets - 1) / hctl->ssize + 1;
726  nsegs = next_pow2_int(nsegs);
727 
728  /*
729  * Make sure directory is big enough. If pre-allocated directory is too
730  * small, choke (caller screwed up).
731  */
732  if (nsegs > hctl->dsize)
733  {
734  if (!(hashp->dir))
735  hctl->dsize = nsegs;
736  else
737  return false;
738  }
739 
740  /* Allocate a directory */
741  if (!(hashp->dir))
742  {
743  CurrentDynaHashCxt = hashp->hcxt;
744  hashp->dir = (HASHSEGMENT *)
745  hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
746  if (!hashp->dir)
747  return false;
748  }
749 
750  /* Allocate initial segments */
751  for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
752  {
753  *segp = seg_alloc(hashp);
754  if (*segp == NULL)
755  return false;
756  }
757 
758  /* Choose number of entries to allocate at a time */
760 
761 #ifdef HASH_DEBUG
762  fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
763  "TABLE POINTER ", hashp,
764  "DIRECTORY SIZE ", hctl->dsize,
765  "SEGMENT SIZE ", hctl->ssize,
766  "SEGMENT SHIFT ", hctl->sshift,
767  "MAX BUCKET ", hctl->max_bucket,
768  "HIGH MASK ", hctl->high_mask,
769  "LOW MASK ", hctl->low_mask,
770  "NSEGS ", hctl->nsegs);
771 #endif
772  return true;
773 }
774 
775 /*
776  * Estimate the space needed for a hashtable containing the given number
777  * of entries of given size.
778  * NOTE: this is used to estimate the footprint of hashtables in shared
779  * memory; therefore it does not count HTAB which is in local memory.
780  * NB: assumes that all hash structure parameters have default values!
781  */
782 Size
783 hash_estimate_size(long num_entries, Size entrysize)
784 {
785  Size size;
786  long nBuckets,
787  nSegments,
788  nDirEntries,
789  nElementAllocs,
790  elementSize,
791  elementAllocCnt;
792 
793  /* estimate number of buckets wanted */
794  nBuckets = next_pow2_long(num_entries);
795  /* # of segments needed for nBuckets */
796  nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
797  /* directory entries */
798  nDirEntries = DEF_DIRSIZE;
799  while (nDirEntries < nSegments)
800  nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
801 
802  /* fixed control info */
803  size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
804  /* directory */
805  size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
806  /* segments */
807  size = add_size(size, mul_size(nSegments,
808  MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
809  /* elements --- allocated in groups of choose_nelem_alloc() entries */
810  elementAllocCnt = choose_nelem_alloc(entrysize);
811  nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
812  elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
813  size = add_size(size,
814  mul_size(nElementAllocs,
815  mul_size(elementAllocCnt, elementSize)));
816 
817  return size;
818 }
819 
820 /*
821  * Select an appropriate directory size for a hashtable with the given
822  * maximum number of entries.
823  * This is only needed for hashtables in shared memory, whose directories
824  * cannot be expanded dynamically.
825  * NB: assumes that all hash structure parameters have default values!
826  *
827  * XXX this had better agree with the behavior of init_htab()...
828  */
829 long
830 hash_select_dirsize(long num_entries)
831 {
832  long nBuckets,
833  nSegments,
834  nDirEntries;
835 
836  /* estimate number of buckets wanted */
837  nBuckets = next_pow2_long(num_entries);
838  /* # of segments needed for nBuckets */
839  nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
840  /* directory entries */
841  nDirEntries = DEF_DIRSIZE;
842  while (nDirEntries < nSegments)
843  nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
844 
845  return nDirEntries;
846 }
847 
848 /*
849  * Compute the required initial memory allocation for a shared-memory
850  * hashtable with the given parameters. We need space for the HASHHDR
851  * and for the (non expansible) directory.
852  */
853 Size
854 hash_get_shared_size(HASHCTL *info, int flags)
855 {
856  Assert(flags & HASH_DIRSIZE);
857  Assert(info->dsize == info->max_dsize);
858  return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
859 }
860 
861 
862 /********************** DESTROY ROUTINES ************************/
863 
864 void
866 {
867  if (hashp != NULL)
868  {
869  /* allocation method must be one we know how to free, too */
870  Assert(hashp->alloc == DynaHashAlloc);
871  /* so this hashtable must have its own context */
872  Assert(hashp->hcxt != NULL);
873 
874  hash_stats("destroy", hashp);
875 
876  /*
877  * Free everything by destroying the hash table's memory context.
878  */
879  MemoryContextDelete(hashp->hcxt);
880  }
881 }
882 
883 void
884 hash_stats(const char *where, HTAB *hashp)
885 {
886 #ifdef HASH_STATISTICS
887  fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
888  where, hashp->hctl->accesses, hashp->hctl->collisions);
889 
890  fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
891  hash_get_num_entries(hashp), (long) hashp->hctl->keysize,
892  hashp->hctl->max_bucket, hashp->hctl->nsegs);
893  fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
894  where, hash_accesses, hash_collisions);
895  fprintf(stderr, "hash_stats: total expansions %ld\n",
896  hash_expansions);
897 #endif
898 }
899 
900 /*******************************SEARCH ROUTINES *****************************/
901 
902 
903 /*
904  * get_hash_value -- exported routine to calculate a key's hash value
905  *
906  * We export this because for partitioned tables, callers need to compute
907  * the partition number (from the low-order bits of the hash value) before
908  * searching.
909  */
910 uint32
911 get_hash_value(HTAB *hashp, const void *keyPtr)
912 {
913  return hashp->hash(keyPtr, hashp->keysize);
914 }
915 
916 /* Convert a hash value to a bucket number */
917 static inline uint32
918 calc_bucket(HASHHDR *hctl, uint32 hash_val)
919 {
920  uint32 bucket;
921 
922  bucket = hash_val & hctl->high_mask;
923  if (bucket > hctl->max_bucket)
924  bucket = bucket & hctl->low_mask;
925 
926  return bucket;
927 }
928 
929 /*
930  * hash_search -- look up key in table and perform action
931  * hash_search_with_hash_value -- same, with key's hash value already computed
932  *
933  * action is one of:
934  * HASH_FIND: look up key in table
935  * HASH_ENTER: look up key in table, creating entry if not present
936  * HASH_ENTER_NULL: same, but return NULL if out of memory
937  * HASH_REMOVE: look up key in table, remove entry if present
938  *
939  * Return value is a pointer to the element found/entered/removed if any,
940  * or NULL if no match was found. (NB: in the case of the REMOVE action,
941  * the result is a dangling pointer that shouldn't be dereferenced!)
942  *
943  * HASH_ENTER will normally ereport a generic "out of memory" error if
944  * it is unable to create a new entry. The HASH_ENTER_NULL operation is
945  * the same except it will return NULL if out of memory.
946  *
947  * If foundPtr isn't NULL, then *foundPtr is set true if we found an
948  * existing entry in the table, false otherwise. This is needed in the
949  * HASH_ENTER case, but is redundant with the return value otherwise.
950  *
951  * For hash_search_with_hash_value, the hashvalue parameter must have been
952  * calculated with get_hash_value().
953  */
954 void *
956  const void *keyPtr,
958  bool *foundPtr)
959 {
960  return hash_search_with_hash_value(hashp,
961  keyPtr,
962  hashp->hash(keyPtr, hashp->keysize),
963  action,
964  foundPtr);
965 }
966 
967 void *
969  const void *keyPtr,
970  uint32 hashvalue,
972  bool *foundPtr)
973 {
974  HASHHDR *hctl = hashp->hctl;
975  int freelist_idx = FREELIST_IDX(hctl, hashvalue);
976  Size keysize;
977  HASHBUCKET currBucket;
978  HASHBUCKET *prevBucketPtr;
979  HashCompareFunc match;
980 
981 #ifdef HASH_STATISTICS
982  hash_accesses++;
983  hctl->accesses++;
984 #endif
985 
986  /*
987  * If inserting, check if it is time to split a bucket.
988  *
989  * NOTE: failure to expand table is not a fatal error, it just means we
990  * have to run at higher fill factor than we wanted. However, if we're
991  * using the palloc allocator then it will throw error anyway on
992  * out-of-memory, so we must do this before modifying the table.
993  */
995  {
996  /*
997  * Can't split if running in partitioned mode, nor if frozen, nor if
998  * table is the subject of any active hash_seq_search scans.
999  */
1000  if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
1001  !IS_PARTITIONED(hctl) && !hashp->frozen &&
1002  !has_seq_scans(hashp))
1003  (void) expand_table(hashp);
1004  }
1005 
1006  /*
1007  * Do the initial lookup
1008  */
1009  (void) hash_initial_lookup(hashp, hashvalue, &prevBucketPtr);
1010  currBucket = *prevBucketPtr;
1011 
1012  /*
1013  * Follow collision chain looking for matching key
1014  */
1015  match = hashp->match; /* save one fetch in inner loop */
1016  keysize = hashp->keysize; /* ditto */
1017 
1018  while (currBucket != NULL)
1019  {
1020  if (currBucket->hashvalue == hashvalue &&
1021  match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
1022  break;
1023  prevBucketPtr = &(currBucket->link);
1024  currBucket = *prevBucketPtr;
1025 #ifdef HASH_STATISTICS
1026  hash_collisions++;
1027  hctl->collisions++;
1028 #endif
1029  }
1030 
1031  if (foundPtr)
1032  *foundPtr = (bool) (currBucket != NULL);
1033 
1034  /*
1035  * OK, now what?
1036  */
1037  switch (action)
1038  {
1039  case HASH_FIND:
1040  if (currBucket != NULL)
1041  return (void *) ELEMENTKEY(currBucket);
1042  return NULL;
1043 
1044  case HASH_REMOVE:
1045  if (currBucket != NULL)
1046  {
1047  /* if partitioned, must lock to touch nentries and freeList */
1048  if (IS_PARTITIONED(hctl))
1049  SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
1050 
1051  /* delete the record from the appropriate nentries counter. */
1052  Assert(hctl->freeList[freelist_idx].nentries > 0);
1053  hctl->freeList[freelist_idx].nentries--;
1054 
1055  /* remove record from hash bucket's chain. */
1056  *prevBucketPtr = currBucket->link;
1057 
1058  /* add the record to the appropriate freelist. */
1059  currBucket->link = hctl->freeList[freelist_idx].freeList;
1060  hctl->freeList[freelist_idx].freeList = currBucket;
1061 
1062  if (IS_PARTITIONED(hctl))
1063  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1064 
1065  /*
1066  * better hope the caller is synchronizing access to this
1067  * element, because someone else is going to reuse it the next
1068  * time something is added to the table
1069  */
1070  return (void *) ELEMENTKEY(currBucket);
1071  }
1072  return NULL;
1073 
1074  case HASH_ENTER:
1075  case HASH_ENTER_NULL:
1076  /* Return existing element if found, else create one */
1077  if (currBucket != NULL)
1078  return (void *) ELEMENTKEY(currBucket);
1079 
1080  /* disallow inserts if frozen */
1081  if (hashp->frozen)
1082  elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
1083  hashp->tabname);
1084 
1085  currBucket = get_hash_entry(hashp, freelist_idx);
1086  if (currBucket == NULL)
1087  {
1088  /* out of memory */
1089  if (action == HASH_ENTER_NULL)
1090  return NULL;
1091  /* report a generic message */
1092  if (hashp->isshared)
1093  ereport(ERROR,
1094  (errcode(ERRCODE_OUT_OF_MEMORY),
1095  errmsg("out of shared memory")));
1096  else
1097  ereport(ERROR,
1098  (errcode(ERRCODE_OUT_OF_MEMORY),
1099  errmsg("out of memory")));
1100  }
1101 
1102  /* link into hashbucket chain */
1103  *prevBucketPtr = currBucket;
1104  currBucket->link = NULL;
1105 
1106  /* copy key into record */
1107  currBucket->hashvalue = hashvalue;
1108  hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
1109 
1110  /*
1111  * Caller is expected to fill the data field on return. DO NOT
1112  * insert any code that could possibly throw error here, as doing
1113  * so would leave the table entry incomplete and hence corrupt the
1114  * caller's data structure.
1115  */
1116 
1117  return (void *) ELEMENTKEY(currBucket);
1118  }
1119 
1120  elog(ERROR, "unrecognized hash action code: %d", (int) action);
1121 
1122  return NULL; /* keep compiler quiet */
1123 }
1124 
1125 /*
1126  * hash_update_hash_key -- change the hash key of an existing table entry
1127  *
1128  * This is equivalent to removing the entry, making a new entry, and copying
1129  * over its data, except that the entry never goes to the table's freelist.
1130  * Therefore this cannot suffer an out-of-memory failure, even if there are
1131  * other processes operating in other partitions of the hashtable.
1132  *
1133  * Returns true if successful, false if the requested new hash key is already
1134  * present. Throws error if the specified entry pointer isn't actually a
1135  * table member.
1136  *
1137  * NB: currently, there is no special case for old and new hash keys being
1138  * identical, which means we'll report false for that situation. This is
1139  * preferable for existing uses.
1140  *
1141  * NB: for a partitioned hashtable, caller must hold lock on both relevant
1142  * partitions, if the new hash key would belong to a different partition.
1143  */
1144 bool
1146  void *existingEntry,
1147  const void *newKeyPtr)
1148 {
1149  HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1150  uint32 newhashvalue;
1151  Size keysize;
1152  uint32 bucket;
1153  uint32 newbucket;
1154  HASHBUCKET currBucket;
1155  HASHBUCKET *prevBucketPtr;
1156  HASHBUCKET *oldPrevPtr;
1157  HashCompareFunc match;
1158 
1159 #ifdef HASH_STATISTICS
1160  hash_accesses++;
1161  hctl->accesses++;
1162 #endif
1163 
1164  /* disallow updates if frozen */
1165  if (hashp->frozen)
1166  elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1167  hashp->tabname);
1168 
1169  /*
1170  * Lookup the existing element using its saved hash value. We need to do
1171  * this to be able to unlink it from its hash chain, but as a side benefit
1172  * we can verify the validity of the passed existingEntry pointer.
1173  */
1174  bucket = hash_initial_lookup(hashp, existingElement->hashvalue,
1175  &prevBucketPtr);
1176  currBucket = *prevBucketPtr;
1177 
1178  while (currBucket != NULL)
1179  {
1180  if (currBucket == existingElement)
1181  break;
1182  prevBucketPtr = &(currBucket->link);
1183  currBucket = *prevBucketPtr;
1184  }
1185 
1186  if (currBucket == NULL)
1187  elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1188  hashp->tabname);
1189 
1190  oldPrevPtr = prevBucketPtr;
1191 
1192  /*
1193  * Now perform the equivalent of a HASH_ENTER operation to locate the hash
1194  * chain we want to put the entry into.
1195  */
1196  newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
1197  newbucket = hash_initial_lookup(hashp, newhashvalue, &prevBucketPtr);
1198  currBucket = *prevBucketPtr;
1199 
1200  /*
1201  * Follow collision chain looking for matching key
1202  */
1203  match = hashp->match; /* save one fetch in inner loop */
1204  keysize = hashp->keysize; /* ditto */
1205 
1206  while (currBucket != NULL)
1207  {
1208  if (currBucket->hashvalue == newhashvalue &&
1209  match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1210  break;
1211  prevBucketPtr = &(currBucket->link);
1212  currBucket = *prevBucketPtr;
1213 #ifdef HASH_STATISTICS
1214  hash_collisions++;
1215  hctl->collisions++;
1216 #endif
1217  }
1218 
1219  if (currBucket != NULL)
1220  return false; /* collision with an existing entry */
1221 
1222  currBucket = existingElement;
1223 
1224  /*
1225  * If old and new hash values belong to the same bucket, we need not
1226  * change any chain links, and indeed should not since this simplistic
1227  * update will corrupt the list if currBucket is the last element. (We
1228  * cannot fall out earlier, however, since we need to scan the bucket to
1229  * check for duplicate keys.)
1230  */
1231  if (bucket != newbucket)
1232  {
1233  /* OK to remove record from old hash bucket's chain. */
1234  *oldPrevPtr = currBucket->link;
1235 
1236  /* link into new hashbucket chain */
1237  *prevBucketPtr = currBucket;
1238  currBucket->link = NULL;
1239  }
1240 
1241  /* copy new key into record */
1242  currBucket->hashvalue = newhashvalue;
1243  hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1244 
1245  /* rest of record is untouched */
1246 
1247  return true;
1248 }
1249 
1250 /*
1251  * Allocate a new hashtable entry if possible; return NULL if out of memory.
1252  * (Or, if the underlying space allocator throws error for out-of-memory,
1253  * we won't return at all.)
1254  */
1255 static HASHBUCKET
1256 get_hash_entry(HTAB *hashp, int freelist_idx)
1257 {
1258  HASHHDR *hctl = hashp->hctl;
1259  HASHBUCKET newElement;
1260 
1261  for (;;)
1262  {
1263  /* if partitioned, must lock to touch nentries and freeList */
1264  if (IS_PARTITIONED(hctl))
1265  SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1266 
1267  /* try to get an entry from the freelist */
1268  newElement = hctl->freeList[freelist_idx].freeList;
1269 
1270  if (newElement != NULL)
1271  break;
1272 
1273  if (IS_PARTITIONED(hctl))
1274  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1275 
1276  /*
1277  * No free elements in this freelist. In a partitioned table, there
1278  * might be entries in other freelists, but to reduce contention we
1279  * prefer to first try to get another chunk of buckets from the main
1280  * shmem allocator. If that fails, though, we *MUST* root through all
1281  * the other freelists before giving up. There are multiple callers
1282  * that assume that they can allocate every element in the initially
1283  * requested table size, or that deleting an element guarantees they
1284  * can insert a new element, even if shared memory is entirely full.
1285  * Failing because the needed element is in a different freelist is
1286  * not acceptable.
1287  */
1288  if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1289  {
1290  int borrow_from_idx;
1291 
1292  if (!IS_PARTITIONED(hctl))
1293  return NULL; /* out of memory */
1294 
1295  /* try to borrow element from another freelist */
1296  borrow_from_idx = freelist_idx;
1297  for (;;)
1298  {
1299  borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
1300  if (borrow_from_idx == freelist_idx)
1301  break; /* examined all freelists, fail */
1302 
1303  SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
1304  newElement = hctl->freeList[borrow_from_idx].freeList;
1305 
1306  if (newElement != NULL)
1307  {
1308  hctl->freeList[borrow_from_idx].freeList = newElement->link;
1309  SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1310 
1311  /* careful: count the new element in its proper freelist */
1312  SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1313  hctl->freeList[freelist_idx].nentries++;
1314  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1315 
1316  return newElement;
1317  }
1318 
1319  SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1320  }
1321 
1322  /* no elements available to borrow either, so out of memory */
1323  return NULL;
1324  }
1325  }
1326 
1327  /* remove entry from freelist, bump nentries */
1328  hctl->freeList[freelist_idx].freeList = newElement->link;
1329  hctl->freeList[freelist_idx].nentries++;
1330 
1331  if (IS_PARTITIONED(hctl))
1332  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1333 
1334  return newElement;
1335 }
1336 
1337 /*
1338  * hash_get_num_entries -- get the number of entries in a hashtable
1339  */
1340 long
1342 {
1343  int i;
1344  long sum = hashp->hctl->freeList[0].nentries;
1345 
1346  /*
1347  * We currently don't bother with acquiring the mutexes; it's only
1348  * sensible to call this function if you've got lock on all partitions of
1349  * the table.
1350  */
1351  if (IS_PARTITIONED(hashp->hctl))
1352  {
1353  for (i = 1; i < NUM_FREELISTS; i++)
1354  sum += hashp->hctl->freeList[i].nentries;
1355  }
1356 
1357  return sum;
1358 }
1359 
1360 /*
1361  * hash_seq_init/_search/_term
1362  * Sequentially search through hash table and return
1363  * all the elements one by one, return NULL when no more.
1364  *
1365  * hash_seq_term should be called if and only if the scan is abandoned before
1366  * completion; if hash_seq_search returns NULL then it has already done the
1367  * end-of-scan cleanup.
1368  *
1369  * NOTE: caller may delete the returned element before continuing the scan.
1370  * However, deleting any other element while the scan is in progress is
1371  * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
1372  * if elements are added to the table while the scan is in progress, it is
1373  * unspecified whether they will be visited by the scan or not.
1374  *
1375  * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
1376  * worry about hash_seq_term cleanup, if the hashtable is first locked against
1377  * further insertions by calling hash_freeze.
1378  *
1379  * NOTE: to use this with a partitioned hashtable, caller had better hold
1380  * at least shared lock on all partitions of the table throughout the scan!
1381  * We can cope with insertions or deletions by our own backend, but *not*
1382  * with concurrent insertions or deletions by another.
1383  */
1384 void
1386 {
1387  status->hashp = hashp;
1388  status->curBucket = 0;
1389  status->curEntry = NULL;
1390  status->hasHashvalue = false;
1391  if (!hashp->frozen)
1392  register_seq_scan(hashp);
1393 }
1394 
1395 /*
1396  * Same as above but scan by the given hash value.
1397  * See also hash_seq_search().
1398  *
1399  * NOTE: the default hash function doesn't match syscache hash function.
1400  * Thus, if you're going to use this function in syscache callback, make sure
1401  * you're using custom hash function. See relatt_cache_syshash()
1402  * for example.
1403  */
1404 void
1406  uint32 hashvalue)
1407 {
1408  HASHBUCKET *bucketPtr;
1409 
1410  hash_seq_init(status, hashp);
1411 
1412  status->hasHashvalue = true;
1413  status->hashvalue = hashvalue;
1414 
1415  status->curBucket = hash_initial_lookup(hashp, hashvalue, &bucketPtr);
1416  status->curEntry = *bucketPtr;
1417 }
1418 
1419 void *
1421 {
1422  HTAB *hashp;
1423  HASHHDR *hctl;
1424  uint32 max_bucket;
1425  long ssize;
1426  long segment_num;
1427  long segment_ndx;
1428  HASHSEGMENT segp;
1429  uint32 curBucket;
1430  HASHELEMENT *curElem;
1431 
1432  if (status->hasHashvalue)
1433  {
1434  /*
1435  * Scan entries only in the current bucket because only this bucket
1436  * can contain entries with the given hash value.
1437  */
1438  while ((curElem = status->curEntry) != NULL)
1439  {
1440  status->curEntry = curElem->link;
1441  if (status->hashvalue != curElem->hashvalue)
1442  continue;
1443  return (void *) ELEMENTKEY(curElem);
1444  }
1445 
1446  hash_seq_term(status);
1447  return NULL;
1448  }
1449 
1450  if ((curElem = status->curEntry) != NULL)
1451  {
1452  /* Continuing scan of curBucket... */
1453  status->curEntry = curElem->link;
1454  if (status->curEntry == NULL) /* end of this bucket */
1455  ++status->curBucket;
1456  return (void *) ELEMENTKEY(curElem);
1457  }
1458 
1459  /*
1460  * Search for next nonempty bucket starting at curBucket.
1461  */
1462  curBucket = status->curBucket;
1463  hashp = status->hashp;
1464  hctl = hashp->hctl;
1465  ssize = hashp->ssize;
1466  max_bucket = hctl->max_bucket;
1467 
1468  if (curBucket > max_bucket)
1469  {
1470  hash_seq_term(status);
1471  return NULL; /* search is done */
1472  }
1473 
1474  /*
1475  * first find the right segment in the table directory.
1476  */
1477  segment_num = curBucket >> hashp->sshift;
1478  segment_ndx = MOD(curBucket, ssize);
1479 
1480  segp = hashp->dir[segment_num];
1481 
1482  /*
1483  * Pick up the first item in this bucket's chain. If chain is not empty
1484  * we can begin searching it. Otherwise we have to advance to find the
1485  * next nonempty bucket. We try to optimize that case since searching a
1486  * near-empty hashtable has to iterate this loop a lot.
1487  */
1488  while ((curElem = segp[segment_ndx]) == NULL)
1489  {
1490  /* empty bucket, advance to next */
1491  if (++curBucket > max_bucket)
1492  {
1493  status->curBucket = curBucket;
1494  hash_seq_term(status);
1495  return NULL; /* search is done */
1496  }
1497  if (++segment_ndx >= ssize)
1498  {
1499  segment_num++;
1500  segment_ndx = 0;
1501  segp = hashp->dir[segment_num];
1502  }
1503  }
1504 
1505  /* Begin scan of curBucket... */
1506  status->curEntry = curElem->link;
1507  if (status->curEntry == NULL) /* end of this bucket */
1508  ++curBucket;
1509  status->curBucket = curBucket;
1510  return (void *) ELEMENTKEY(curElem);
1511 }
1512 
1513 void
1515 {
1516  if (!status->hashp->frozen)
1517  deregister_seq_scan(status->hashp);
1518 }
1519 
1520 /*
1521  * hash_freeze
1522  * Freeze a hashtable against future insertions (deletions are
1523  * still allowed)
1524  *
1525  * The reason for doing this is that by preventing any more bucket splits,
1526  * we no longer need to worry about registering hash_seq_search scans,
1527  * and thus caller need not be careful about ensuring hash_seq_term gets
1528  * called at the right times.
1529  *
1530  * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
1531  * with active scans (since hash_seq_term would then do the wrong thing).
1532  */
1533 void
1535 {
1536  if (hashp->isshared)
1537  elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
1538  if (!hashp->frozen && has_seq_scans(hashp))
1539  elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
1540  hashp->tabname);
1541  hashp->frozen = true;
1542 }
1543 
1544 
1545 /********************************* UTILITIES ************************/
1546 
1547 /*
1548  * Expand the table by adding one more hash bucket.
1549  */
1550 static bool
1552 {
1553  HASHHDR *hctl = hashp->hctl;
1554  HASHSEGMENT old_seg,
1555  new_seg;
1556  long old_bucket,
1557  new_bucket;
1558  long new_segnum,
1559  new_segndx;
1560  long old_segnum,
1561  old_segndx;
1562  HASHBUCKET *oldlink,
1563  *newlink;
1564  HASHBUCKET currElement,
1565  nextElement;
1566 
1567  Assert(!IS_PARTITIONED(hctl));
1568 
1569 #ifdef HASH_STATISTICS
1570  hash_expansions++;
1571 #endif
1572 
1573  new_bucket = hctl->max_bucket + 1;
1574  new_segnum = new_bucket >> hashp->sshift;
1575  new_segndx = MOD(new_bucket, hashp->ssize);
1576 
1577  if (new_segnum >= hctl->nsegs)
1578  {
1579  /* Allocate new segment if necessary -- could fail if dir full */
1580  if (new_segnum >= hctl->dsize)
1581  if (!dir_realloc(hashp))
1582  return false;
1583  if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
1584  return false;
1585  hctl->nsegs++;
1586  }
1587 
1588  /* OK, we created a new bucket */
1589  hctl->max_bucket++;
1590 
1591  /*
1592  * *Before* changing masks, find old bucket corresponding to same hash
1593  * values; values in that bucket may need to be relocated to new bucket.
1594  * Note that new_bucket is certainly larger than low_mask at this point,
1595  * so we can skip the first step of the regular hash mask calc.
1596  */
1597  old_bucket = (new_bucket & hctl->low_mask);
1598 
1599  /*
1600  * If we crossed a power of 2, readjust masks.
1601  */
1602  if ((uint32) new_bucket > hctl->high_mask)
1603  {
1604  hctl->low_mask = hctl->high_mask;
1605  hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
1606  }
1607 
1608  /*
1609  * Relocate records to the new bucket. NOTE: because of the way the hash
1610  * masking is done in calc_bucket, only one old bucket can need to be
1611  * split at this point. With a different way of reducing the hash value,
1612  * that might not be true!
1613  */
1614  old_segnum = old_bucket >> hashp->sshift;
1615  old_segndx = MOD(old_bucket, hashp->ssize);
1616 
1617  old_seg = hashp->dir[old_segnum];
1618  new_seg = hashp->dir[new_segnum];
1619 
1620  oldlink = &old_seg[old_segndx];
1621  newlink = &new_seg[new_segndx];
1622 
1623  for (currElement = *oldlink;
1624  currElement != NULL;
1625  currElement = nextElement)
1626  {
1627  nextElement = currElement->link;
1628  if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
1629  {
1630  *oldlink = currElement;
1631  oldlink = &currElement->link;
1632  }
1633  else
1634  {
1635  *newlink = currElement;
1636  newlink = &currElement->link;
1637  }
1638  }
1639  /* don't forget to terminate the rebuilt hash chains... */
1640  *oldlink = NULL;
1641  *newlink = NULL;
1642 
1643  return true;
1644 }
1645 
1646 
1647 static bool
1649 {
1650  HASHSEGMENT *p;
1651  HASHSEGMENT *old_p;
1652  long new_dsize;
1653  long old_dirsize;
1654  long new_dirsize;
1655 
1656  if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
1657  return false;
1658 
1659  /* Reallocate directory */
1660  new_dsize = hashp->hctl->dsize << 1;
1661  old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
1662  new_dirsize = new_dsize * sizeof(HASHSEGMENT);
1663 
1664  old_p = hashp->dir;
1665  CurrentDynaHashCxt = hashp->hcxt;
1666  p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
1667 
1668  if (p != NULL)
1669  {
1670  memcpy(p, old_p, old_dirsize);
1671  MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
1672  hashp->dir = p;
1673  hashp->hctl->dsize = new_dsize;
1674 
1675  /* XXX assume the allocator is palloc, so we know how to free */
1676  Assert(hashp->alloc == DynaHashAlloc);
1677  pfree(old_p);
1678 
1679  return true;
1680  }
1681 
1682  return false;
1683 }
1684 
1685 
1686 static HASHSEGMENT
1688 {
1689  HASHSEGMENT segp;
1690 
1691  CurrentDynaHashCxt = hashp->hcxt;
1692  segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
1693 
1694  if (!segp)
1695  return NULL;
1696 
1697  MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
1698 
1699  return segp;
1700 }
1701 
1702 /*
1703  * allocate some new elements and link them into the indicated free list
1704  */
1705 static bool
1706 element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1707 {
1708  HASHHDR *hctl = hashp->hctl;
1709  Size elementSize;
1710  HASHELEMENT *firstElement;
1711  HASHELEMENT *tmpElement;
1712  HASHELEMENT *prevElement;
1713  int i;
1714 
1715  if (hashp->isfixed)
1716  return false;
1717 
1718  /* Each element has a HASHELEMENT header plus user data. */
1719  elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1720 
1721  CurrentDynaHashCxt = hashp->hcxt;
1722  firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
1723 
1724  if (!firstElement)
1725  return false;
1726 
1727  /* prepare to link all the new entries into the freelist */
1728  prevElement = NULL;
1729  tmpElement = firstElement;
1730  for (i = 0; i < nelem; i++)
1731  {
1732  tmpElement->link = prevElement;
1733  prevElement = tmpElement;
1734  tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
1735  }
1736 
1737  /* if partitioned, must lock to touch freeList */
1738  if (IS_PARTITIONED(hctl))
1739  SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1740 
1741  /* freelist could be nonempty if two backends did this concurrently */
1742  firstElement->link = hctl->freeList[freelist_idx].freeList;
1743  hctl->freeList[freelist_idx].freeList = prevElement;
1744 
1745  if (IS_PARTITIONED(hctl))
1746  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1747 
1748  return true;
1749 }
1750 
1751 /*
1752  * Do initial lookup of a bucket for the given hash value, retrieving its
1753  * bucket number and its hash bucket.
1754  */
1755 static inline uint32
1756 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
1757 {
1758  HASHHDR *hctl = hashp->hctl;
1759  HASHSEGMENT segp;
1760  long segment_num;
1761  long segment_ndx;
1762  uint32 bucket;
1763 
1764  bucket = calc_bucket(hctl, hashvalue);
1765 
1766  segment_num = bucket >> hashp->sshift;
1767  segment_ndx = MOD(bucket, hashp->ssize);
1768 
1769  segp = hashp->dir[segment_num];
1770 
1771  if (segp == NULL)
1772  hash_corrupted(hashp);
1773 
1774  *bucketptr = &segp[segment_ndx];
1775  return bucket;
1776 }
1777 
1778 /* complain when we have detected a corrupted hashtable */
1779 static void
1781 {
1782  /*
1783  * If the corruption is in a shared hashtable, we'd better force a
1784  * systemwide restart. Otherwise, just shut down this one backend.
1785  */
1786  if (hashp->isshared)
1787  elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
1788  else
1789  elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
1790 }
1791 
1792 /* calculate ceil(log base 2) of num */
1793 int
1794 my_log2(long num)
1795 {
1796  /*
1797  * guard against too-large input, which would be invalid for
1798  * pg_ceil_log2_*()
1799  */
1800  if (num > LONG_MAX / 2)
1801  num = LONG_MAX / 2;
1802 
1803 #if SIZEOF_LONG < 8
1804  return pg_ceil_log2_32(num);
1805 #else
1806  return pg_ceil_log2_64(num);
1807 #endif
1808 }
1809 
1810 /* calculate first power of 2 >= num, bounded to what will fit in a long */
1811 static long
1813 {
1814  /* my_log2's internal range check is sufficient */
1815  return 1L << my_log2(num);
1816 }
1817 
1818 /* calculate first power of 2 >= num, bounded to what will fit in an int */
1819 static int
1820 next_pow2_int(long num)
1821 {
1822  if (num > INT_MAX / 2)
1823  num = INT_MAX / 2;
1824  return 1 << my_log2(num);
1825 }
1826 
1827 
1828 /************************* SEQ SCAN TRACKING ************************/
1829 
1830 /*
1831  * We track active hash_seq_search scans here. The need for this mechanism
1832  * comes from the fact that a scan will get confused if a bucket split occurs
1833  * while it's in progress: it might visit entries twice, or even miss some
1834  * entirely (if it's partway through the same bucket that splits). Hence
1835  * we want to inhibit bucket splits if there are any active scans on the
1836  * table being inserted into. This is a fairly rare case in current usage,
1837  * so just postponing the split until the next insertion seems sufficient.
1838  *
1839  * Given present usages of the function, only a few scans are likely to be
1840  * open concurrently; so a finite-size stack of open scans seems sufficient,
1841  * and we don't worry that linear search is too slow. Note that we do
1842  * allow multiple scans of the same hashtable to be open concurrently.
1843  *
1844  * This mechanism can support concurrent scan and insertion in a shared
1845  * hashtable if it's the same backend doing both. It would fail otherwise,
1846  * but locking reasons seem to preclude any such scenario anyway, so we don't
1847  * worry.
1848  *
1849  * This arrangement is reasonably robust if a transient hashtable is deleted
1850  * without notifying us. The absolute worst case is we might inhibit splits
1851  * in another table created later at exactly the same address. We will give
1852  * a warning at transaction end for reference leaks, so any bugs leading to
1853  * lack of notification should be easy to catch.
1854  */
1855 
1856 #define MAX_SEQ_SCANS 100
1857 
1858 static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
1859 static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
1860 static int num_seq_scans = 0;
1861 
1862 
1863 /* Register a table as having an active hash_seq_search scan */
1864 static void
1866 {
1868  elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
1869  hashp->tabname);
1870  seq_scan_tables[num_seq_scans] = hashp;
1872  num_seq_scans++;
1873 }
1874 
1875 /* Deregister an active scan */
1876 static void
1878 {
1879  int i;
1880 
1881  /* Search backward since it's most likely at the stack top */
1882  for (i = num_seq_scans - 1; i >= 0; i--)
1883  {
1884  if (seq_scan_tables[i] == hashp)
1885  {
1888  num_seq_scans--;
1889  return;
1890  }
1891  }
1892  elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
1893  hashp->tabname);
1894 }
1895 
1896 /* Check if a table has any active scan */
1897 static bool
1899 {
1900  int i;
1901 
1902  for (i = 0; i < num_seq_scans; i++)
1903  {
1904  if (seq_scan_tables[i] == hashp)
1905  return true;
1906  }
1907  return false;
1908 }
1909 
1910 /* Clean up any open scans at end of transaction */
1911 void
1912 AtEOXact_HashTables(bool isCommit)
1913 {
1914  /*
1915  * During abort cleanup, open scans are expected; just silently clean 'em
1916  * out. An open scan at commit means someone forgot a hash_seq_term()
1917  * call, so complain.
1918  *
1919  * Note: it's tempting to try to print the tabname here, but refrain for
1920  * fear of touching deallocated memory. This isn't a user-facing message
1921  * anyway, so it needn't be pretty.
1922  */
1923  if (isCommit)
1924  {
1925  int i;
1926 
1927  for (i = 0; i < num_seq_scans; i++)
1928  {
1929  elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1930  seq_scan_tables[i]);
1931  }
1932  }
1933  num_seq_scans = 0;
1934 }
1935 
1936 /* Clean up any open scans at end of subtransaction */
1937 void
1938 AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1939 {
1940  int i;
1941 
1942  /*
1943  * Search backward to make cleanup easy. Note we must check all entries,
1944  * not only those at the end of the array, because deletion technique
1945  * doesn't keep them in order.
1946  */
1947  for (i = num_seq_scans - 1; i >= 0; i--)
1948  {
1949  if (seq_scan_level[i] >= nestDepth)
1950  {
1951  if (isCommit)
1952  elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1953  seq_scan_tables[i]);
1956  num_seq_scans--;
1957  }
1958  }
1959 }
unsigned int uint32
Definition: c.h:506
#define MAXALIGN(LEN)
Definition: c.h:811
#define Assert(condition)
Definition: c.h:858
#define pg_attribute_noreturn()
Definition: c.h:217
unsigned char bool
Definition: c.h:456
#define MemSet(start, val, len)
Definition: c.h:1020
void(* pg_funcptr_t)(void)
Definition: c.h:388
size_t Size
Definition: c.h:605
static HTAB * seq_scan_tables[MAX_SEQ_SCANS]
Definition: dynahash.c:1858
static int seq_scan_level[MAX_SEQ_SCANS]
Definition: dynahash.c:1859
void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, uint32 hashvalue)
Definition: dynahash.c:1405
#define ELEMENT_FROM_KEY(key)
Definition: dynahash.c:249
#define DEF_DIRSIZE
Definition: dynahash.c:125
static void * DynaHashAlloc(Size size)
Definition: dynahash.c:291
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx)
Definition: dynahash.c:1706
void AtEOXact_HashTables(bool isCommit)
Definition: dynahash.c:1912
static bool init_htab(HTAB *hashp, long nelem)
Definition: dynahash.c:689
static HASHSEGMENT seg_alloc(HTAB *hashp)
Definition: dynahash.c:1687
#define MAX_SEQ_SCANS
Definition: dynahash.c:1856
static MemoryContext CurrentDynaHashCxt
Definition: dynahash.c:288
static int choose_nelem_alloc(Size entrysize)
Definition: dynahash.c:656
static int next_pow2_int(long num)
Definition: dynahash.c:1820
Size hash_get_shared_size(HASHCTL *info, int flags)
Definition: dynahash.c:854
static void register_seq_scan(HTAB *hashp)
Definition: dynahash.c:1865
#define MOD(x, y)
Definition: dynahash.c:255
#define IS_PARTITIONED(hctl)
Definition: dynahash.c:210
#define DEF_SEGSIZE_SHIFT
Definition: dynahash.c:124
void AtEOSubXact_HashTables(bool isCommit, int nestDepth)
Definition: dynahash.c:1938
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx)
Definition: dynahash.c:1256
#define NUM_FREELISTS
Definition: dynahash.c:128
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:865
static int string_compare(const char *key1, const char *key2, Size keysize)
Definition: dynahash.c:307
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
static bool expand_table(HTAB *hashp)
Definition: dynahash.c:1551
static void hdefault(HTAB *hashp)
Definition: dynahash.c:629
static void deregister_seq_scan(HTAB *hashp)
Definition: dynahash.c:1877
#define ELEMENTKEY(helem)
Definition: dynahash.c:244
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1514
#define DEF_SEGSIZE
Definition: dynahash.c:123
static int num_seq_scans
Definition: dynahash.c:1860
int my_log2(long num)
Definition: dynahash.c:1794
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352
#define FREELIST_IDX(hctl, hashcode)
Definition: dynahash.c:212
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1341
long hash_select_dirsize(long num_entries)
Definition: dynahash.c:830
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:783
void hash_stats(const char *where, HTAB *hashp)
Definition: dynahash.c:884
void hash_freeze(HTAB *hashp)
Definition: dynahash.c:1534
static bool dir_realloc(HTAB *hashp)
Definition: dynahash.c:1648
bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr)
Definition: dynahash.c:1145
static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
Definition: dynahash.c:1756
HASHELEMENT * HASHBUCKET
Definition: dynahash.c:131
uint32 get_hash_value(HTAB *hashp, const void *keyPtr)
Definition: dynahash.c:911
static uint32 calc_bucket(HASHHDR *hctl, uint32 hash_val)
Definition: dynahash.c:918
static bool has_seq_scans(HTAB *hashp)
Definition: dynahash.c:1898
static long next_pow2_long(long num)
Definition: dynahash.c:1812
void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:968
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1420
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1385
static void hash_corrupted(HTAB *hashp) pg_attribute_noreturn()
Definition: dynahash.c:1780
HASHBUCKET * HASHSEGMENT
Definition: dynahash.c:134
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define FATAL
Definition: elog.h:41
#define WARNING
Definition: elog.h:36
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:17
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
uint32 uint32_hash(const void *key, Size keysize)
Definition: hashfn.c:688
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
#define HASH_KEYCOPY
Definition: hsearch.h:100
#define HASH_STRINGS
Definition: hsearch.h:96
int(* HashCompareFunc)(const void *key1, const void *key2, Size keysize)
Definition: hsearch.h:29
HASHACTION
Definition: hsearch.h:112
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
@ HASH_ENTER_NULL
Definition: hsearch.h:116
#define HASH_CONTEXT
Definition: hsearch.h:102
#define NO_MAX_DSIZE
Definition: hsearch.h:108
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_ALLOC
Definition: hsearch.h:101
#define HASH_DIRSIZE
Definition: hsearch.h:94
uint32(* HashValueFunc)(const void *key, Size keysize)
Definition: hsearch.h:21
void *(* HashAllocFunc)(Size request)
Definition: hsearch.h:44
#define HASH_SEGMENT
Definition: hsearch.h:93
#define HASH_ATTACH
Definition: hsearch.h:104
#define HASH_COMPARE
Definition: hsearch.h:99
struct HASHHDR HASHHDR
Definition: hsearch.h:58
#define HASH_FUNCTION
Definition: hsearch.h:98
#define HASH_BLOBS
Definition: hsearch.h:97
#define HASH_SHARED_MEM
Definition: hsearch.h:103
#define HASH_FIXED_SIZE
Definition: hsearch.h:105
#define HASH_PARTITION
Definition: hsearch.h:92
void *(* HashCopyFunc)(void *dest, const void *src, Size keysize)
Definition: hsearch.h:37
int i
Definition: isn.c:73
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void * MemoryContextAllocExtended(MemoryContext context, Size size, int flags)
Definition: mcxt.c:1238
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:612
#define MemoryContextIsValid(context)
Definition: memnodes.h:145
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static uint64 pg_ceil_log2_64(uint64 num)
Definition: pg_bitutils.h:271
static uint32 pg_ceil_log2_32(uint32 num)
Definition: pg_bitutils.h:258
#define fprintf
Definition: port.h:242
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
Size add_size(Size s1, Size s2)
Definition: shmem.c:493
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510
static pg_noinline void Size size
Definition: slab.c:607
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
slock_t mutex
Definition: dynahash.c:155
HASHELEMENT * freeList
Definition: dynahash.c:157
long nentries
Definition: dynahash.c:156
long ssize
Definition: hsearch.h:70
HashAllocFunc alloc
Definition: hsearch.h:84
Size keysize
Definition: hsearch.h:75
HashValueFunc hash
Definition: hsearch.h:78
Size entrysize
Definition: hsearch.h:76
long dsize
Definition: hsearch.h:72
HashCompareFunc match
Definition: hsearch.h:80
HASHHDR * hctl
Definition: hsearch.h:88
MemoryContext hcxt
Definition: hsearch.h:86
long num_partitions
Definition: hsearch.h:68
HashCopyFunc keycopy
Definition: hsearch.h:82
long max_dsize
Definition: hsearch.h:73
struct HASHELEMENT * link
Definition: hsearch.h:53
uint32 hashvalue
Definition: hsearch.h:54
long max_dsize
Definition: dynahash.c:194
long nsegs
Definition: dynahash.c:185
uint32 high_mask
Definition: dynahash.c:187
long num_partitions
Definition: dynahash.c:193
FreeListData freeList[NUM_FREELISTS]
Definition: dynahash.c:180
Size entrysize
Definition: dynahash.c:192
uint32 max_bucket
Definition: dynahash.c:186
Size keysize
Definition: dynahash.c:191
int nelem_alloc
Definition: dynahash.c:197
uint32 low_mask
Definition: dynahash.c:188
int sshift
Definition: dynahash.c:196
long ssize
Definition: dynahash.c:195
long dsize
Definition: dynahash.c:184
uint32 hashvalue
Definition: hsearch.h:126
HASHELEMENT * curEntry
Definition: hsearch.h:124
uint32 curBucket
Definition: hsearch.h:123
HTAB * hashp
Definition: hsearch.h:122
bool hasHashvalue
Definition: hsearch.h:125
Definition: dynahash.c:220
bool isfixed
Definition: dynahash.c:230
bool isshared
Definition: dynahash.c:229
HashCompareFunc match
Definition: dynahash.c:224
char * tabname
Definition: dynahash.c:228
HASHHDR * hctl
Definition: dynahash.c:221
MemoryContext hcxt
Definition: dynahash.c:227
HashAllocFunc alloc
Definition: dynahash.c:226
HashValueFunc hash
Definition: dynahash.c:223
long ssize
Definition: dynahash.c:237
HASHSEGMENT * dir
Definition: dynahash.c:222
Size keysize
Definition: dynahash.c:236
int sshift
Definition: dynahash.c:238
HashCopyFunc keycopy
Definition: dynahash.c:225
bool frozen
Definition: dynahash.c:233
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:928