PostgreSQL Source Code  git master
dynahash.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dynahash.c
4  * dynamic chained hash tables
5  *
6  * dynahash.c supports both local-to-a-backend hash tables and hash tables in
7  * shared memory. For shared hash tables, it is the caller's responsibility
8  * to provide appropriate access interlocking. The simplest convention is
9  * that a single LWLock protects the whole hash table. Searches (HASH_FIND or
10  * hash_seq_search) need only shared lock, but any update requires exclusive
11  * lock. For heavily-used shared tables, the single-lock approach creates a
12  * concurrency bottleneck, so we also support "partitioned" locking wherein
13  * there are multiple LWLocks guarding distinct subsets of the table. To use
14  * a hash table in partitioned mode, the HASH_PARTITION flag must be given
15  * to hash_create. This prevents any attempt to split buckets on-the-fly.
16  * Therefore, each hash bucket chain operates independently, and no fields
17  * of the hash header change after init except nentries and freeList.
18  * (A partitioned table uses multiple copies of those fields, guarded by
19  * spinlocks, for additional concurrency.)
20  * This lets any subset of the hash buckets be treated as a separately
21  * lockable partition. We expect callers to use the low-order bits of a
22  * lookup key's hash value as a partition number --- this will work because
23  * of the way calc_bucket() maps hash values to bucket numbers.
24  *
25  * For hash tables in shared memory, the memory allocator function should
26  * match malloc's semantics of returning NULL on failure. For hash tables
27  * in local memory, we typically use palloc() which will throw error on
28  * failure. The code in this file has to cope with both cases.
29  *
30  * dynahash.c provides support for these types of lookup keys:
31  *
32  * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
33  * compared as though by strcmp(). This is selected by specifying the
34  * HASH_STRINGS flag to hash_create.
35  *
36  * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
37  * (Caller must ensure there are no undefined padding bits in the keys!)
38  * This is selected by specifying the HASH_BLOBS flag to hash_create.
39  *
40  * 3. More complex key behavior can be selected by specifying user-supplied
41  * hashing, comparison, and/or key-copying functions. At least a hashing
42  * function must be supplied; comparison defaults to memcmp() and key copying
43  * to memcpy() when a user-defined hashing function is selected.
44  *
45  * Compared to simplehash, dynahash has the following benefits:
46  *
47  * - It supports partitioning, which is useful for shared memory access using
48  * locks.
49  * - Shared memory hashes are allocated in a fixed size area at startup and
50  * are discoverable by name from other processes.
51  * - Because entries don't need to be moved in the case of hash conflicts,
52  * dynahash has better performance for large entries.
53  * - Guarantees stable pointers to entries.
54  *
55  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
56  * Portions Copyright (c) 1994, Regents of the University of California
57  *
58  *
59  * IDENTIFICATION
60  * src/backend/utils/hash/dynahash.c
61  *
62  *-------------------------------------------------------------------------
63  */
64 
65 /*
66  * Original comments:
67  *
68  * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
69  * Coded into C, with minor code improvements, and with hsearch(3) interface,
70  * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
71  * also, hcreate/hdestroy routines added to simulate hsearch(3).
72  *
73  * These routines simulate hsearch(3) and family, with the important
74  * difference that the hash table is dynamic - can grow indefinitely
75  * beyond its original size (as supplied to hcreate()).
76  *
77  * Performance appears to be comparable to that of hsearch(3).
78  * The 'source-code' options referred to in hsearch(3)'s 'man' page
79  * are not implemented; otherwise functionality is identical.
80  *
81  * Compilation controls:
82  * HASH_DEBUG controls some informative traces, mainly for debugging.
83  * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
84  * when combined with HASH_DEBUG, these are displayed by hdestroy().
85  *
86  * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
87  * concatenation property, in probably unnecessary code 'optimization'.
88  *
89  * Modified margo@postgres.berkeley.edu February 1990
90  * added multiple table interface
91  * Modified by sullivan@postgres.berkeley.edu April 1990
92  * changed ctl structure for shared memory
93  */
94 
95 #include "postgres.h"
96 
97 #include <limits.h>
98 
99 #include "access/xact.h"
100 #include "common/hashfn.h"
101 #include "port/pg_bitutils.h"
102 #include "storage/shmem.h"
103 #include "storage/spin.h"
104 #include "utils/dynahash.h"
105 #include "utils/memutils.h"
106 
107 
108 /*
109  * Constants
110  *
111  * A hash table has a top-level "directory", each of whose entries points
112  * to a "segment" of ssize bucket headers. The maximum number of hash
113  * buckets is thus dsize * ssize (but dsize may be expansible). Of course,
114  * the number of records in the table can be larger, but we don't want a
115  * whole lot of records per bucket or performance goes down.
116  *
117  * In a hash table allocated in shared memory, the directory cannot be
118  * expanded because it must stay at a fixed address. The directory size
119  * should be selected using hash_select_dirsize (and you'd better have
120  * a good idea of the maximum number of entries!). For non-shared hash
121  * tables, the initial directory size can be left at the default.
122  */
123 #define DEF_SEGSIZE 256
124 #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
125 #define DEF_DIRSIZE 256
126 
127 /* Number of freelists to be used for a partitioned hash table. */
128 #define NUM_FREELISTS 32
129 
130 /* A hash bucket is a linked list of HASHELEMENTs */
132 
133 /* A hash segment is an array of bucket headers */
135 
136 /*
137  * Per-freelist data.
138  *
139  * In a partitioned hash table, each freelist is associated with a specific
140  * set of hashcodes, as determined by the FREELIST_IDX() macro below.
141  * nentries tracks the number of live hashtable entries having those hashcodes
142  * (NOT the number of entries in the freelist, as you might expect).
143  *
144  * The coverage of a freelist might be more or less than one partition, so it
145  * needs its own lock rather than relying on caller locking. Relying on that
146  * wouldn't work even if the coverage was the same, because of the occasional
147  * need to "borrow" entries from another freelist; see get_hash_entry().
148  *
149  * Using an array of FreeListData instead of separate arrays of mutexes,
150  * nentries and freeLists helps to reduce sharing of cache lines between
151  * different mutexes.
152  */
153 typedef struct
154 {
155  slock_t mutex; /* spinlock for this freelist */
156  long nentries; /* number of entries in associated buckets */
157  HASHELEMENT *freeList; /* chain of free elements */
158 } FreeListData;
159 
160 /*
161  * Header structure for a hash table --- contains all changeable info
162  *
163  * In a shared-memory hash table, the HASHHDR is in shared memory, while
164  * each backend has a local HTAB struct. For a non-shared table, there isn't
165  * any functional difference between HASHHDR and HTAB, but we separate them
166  * anyway to share code between shared and non-shared tables.
167  */
168 struct HASHHDR
169 {
170  /*
171  * The freelist can become a point of contention in high-concurrency hash
172  * tables, so we use an array of freelists, each with its own mutex and
173  * nentries count, instead of just a single one. Although the freelists
174  * normally operate independently, we will scavenge entries from freelists
175  * other than a hashcode's default freelist when necessary.
176  *
177  * If the hash table is not partitioned, only freeList[0] is used and its
178  * spinlock is not used at all; callers' locking is assumed sufficient.
179  */
181 
182  /* These fields can change, but not in a partitioned table */
183  /* Also, dsize can't change in a shared table, even if unpartitioned */
184  long dsize; /* directory size */
185  long nsegs; /* number of allocated segments (<= dsize) */
186  uint32 max_bucket; /* ID of maximum bucket in use */
187  uint32 high_mask; /* mask to modulo into entire table */
188  uint32 low_mask; /* mask to modulo into lower half of table */
189 
190  /* These fields are fixed at hashtable creation */
191  Size keysize; /* hash key length in bytes */
192  Size entrysize; /* total user element size in bytes */
193  long num_partitions; /* # partitions (must be power of 2), or 0 */
194  long max_dsize; /* 'dsize' limit if directory is fixed size */
195  long ssize; /* segment size --- must be power of 2 */
196  int sshift; /* segment shift = log2(ssize) */
197  int nelem_alloc; /* number of entries to allocate at once */
198 
199 #ifdef HASH_STATISTICS
200 
201  /*
202  * Count statistics here. NB: stats code doesn't bother with mutex, so
203  * counts could be corrupted a bit in a partitioned table.
204  */
205  long accesses;
206  long collisions;
207 #endif
208 };
209 
210 #define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
211 
212 #define FREELIST_IDX(hctl, hashcode) \
213  (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
214 
215 /*
216  * Top control structure for a hashtable --- in a shared table, each backend
217  * has its own copy (OK since no fields change at runtime)
218  */
219 struct HTAB
220 {
221  HASHHDR *hctl; /* => shared control information */
222  HASHSEGMENT *dir; /* directory of segment starts */
223  HashValueFunc hash; /* hash function */
224  HashCompareFunc match; /* key comparison function */
225  HashCopyFunc keycopy; /* key copying function */
226  HashAllocFunc alloc; /* memory allocator */
227  MemoryContext hcxt; /* memory context if default allocator used */
228  char *tabname; /* table name (for error messages) */
229  bool isshared; /* true if table is in shared memory */
230  bool isfixed; /* if true, don't enlarge */
231 
232  /* freezing a shared table isn't allowed, so we can keep state here */
233  bool frozen; /* true = no more inserts allowed */
234 
235  /* We keep local copies of these fixed values to reduce contention */
236  Size keysize; /* hash key length in bytes */
237  long ssize; /* segment size --- must be power of 2 */
238  int sshift; /* segment shift = log2(ssize) */
239 };
240 
241 /*
242  * Key (also entry) part of a HASHELEMENT
243  */
244 #define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
245 
246 /*
247  * Obtain element pointer given pointer to key
248  */
249 #define ELEMENT_FROM_KEY(key) \
250  ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
251 
252 /*
253  * Fast MOD arithmetic, assuming that y is a power of 2 !
254  */
255 #define MOD(x,y) ((x) & ((y)-1))
256 
257 #ifdef HASH_STATISTICS
258 static long hash_accesses,
259  hash_collisions,
260  hash_expansions;
261 #endif
262 
263 /*
264  * Private function prototypes
265  */
266 static void *DynaHashAlloc(Size size);
267 static HASHSEGMENT seg_alloc(HTAB *hashp);
268 static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
269 static bool dir_realloc(HTAB *hashp);
270 static bool expand_table(HTAB *hashp);
271 static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
272 static void hdefault(HTAB *hashp);
273 static int choose_nelem_alloc(Size entrysize);
274 static bool init_htab(HTAB *hashp, long nelem);
275 static void hash_corrupted(HTAB *hashp);
276 static long next_pow2_long(long num);
277 static int next_pow2_int(long num);
278 static void register_seq_scan(HTAB *hashp);
279 static void deregister_seq_scan(HTAB *hashp);
280 static bool has_seq_scans(HTAB *hashp);
281 
282 
283 /*
284  * memory allocation support
285  */
287 
288 static void *
290 {
291  Assert(MemoryContextIsValid(CurrentDynaHashCxt));
292  return MemoryContextAlloc(CurrentDynaHashCxt, size);
293 }
294 
295 
296 /*
297  * HashCompareFunc for string keys
298  *
299  * Because we copy keys with strlcpy(), they will be truncated at keysize-1
300  * bytes, so we can only compare that many ... hence strncmp is almost but
301  * not quite the right thing.
302  */
303 static int
304 string_compare(const char *key1, const char *key2, Size keysize)
305 {
306  return strncmp(key1, key2, keysize - 1);
307 }
308 
309 
310 /************************** CREATE ROUTINES **********************/
311 
312 /*
313  * hash_create -- create a new dynamic hash table
314  *
315  * tabname: a name for the table (for debugging purposes)
316  * nelem: maximum number of elements expected
317  * *info: additional table parameters, as indicated by flags
318  * flags: bitmask indicating which parameters to take from *info
319  *
320  * The flags value *must* include HASH_ELEM. (Formerly, this was nominally
321  * optional, but the default keysize and entrysize values were useless.)
322  * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,
323  * or HASH_FUNCTION, to define the key hashing semantics (C strings,
324  * binary blobs, or custom, respectively). Callers specifying a custom
325  * hash function will likely also want to use HASH_COMPARE, and perhaps
326  * also HASH_KEYCOPY, to control key comparison and copying.
327  * Another often-used flag is HASH_CONTEXT, to allocate the hash table
328  * under info->hcxt rather than under TopMemoryContext; the default
329  * behavior is only suitable for session-lifespan hash tables.
330  * Other flags bits are special-purpose and seldom used, except for those
331  * associated with shared-memory hash tables, for which see ShmemInitHash().
332  *
333  * Fields in *info are read only when the associated flags bit is set.
334  * It is not necessary to initialize other fields of *info.
335  * Neither tabname nor *info need persist after the hash_create() call.
336  *
337  * Note: It is deprecated for callers of hash_create() to explicitly specify
338  * string_hash, tag_hash, uint32_hash, or oid_hash. Just set HASH_STRINGS or
339  * HASH_BLOBS. Use HASH_FUNCTION only when you want something other than
340  * one of these.
341  *
342  * Note: for a shared-memory hashtable, nelem needs to be a pretty good
343  * estimate, since we can't expand the table on the fly. But an unshared
344  * hashtable can be expanded on-the-fly, so it's better for nelem to be
345  * on the small side and let the table grow if it's exceeded. An overly
346  * large nelem will penalize hash_seq_search speed without buying much.
347  */
348 HTAB *
349 hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
350 {
351  HTAB *hashp;
352  HASHHDR *hctl;
353 
354  /*
355  * Hash tables now allocate space for key and data, but you have to say
356  * how much space to allocate.
357  */
358  Assert(flags & HASH_ELEM);
359  Assert(info->keysize > 0);
360  Assert(info->entrysize >= info->keysize);
361 
362  /*
363  * For shared hash tables, we have a local hash header (HTAB struct) that
364  * we allocate in TopMemoryContext; all else is in shared memory.
365  *
366  * For non-shared hash tables, everything including the hash header is in
367  * a memory context created specially for the hash table --- this makes
368  * hash_destroy very simple. The memory context is made a child of either
369  * a context specified by the caller, or TopMemoryContext if nothing is
370  * specified.
371  */
372  if (flags & HASH_SHARED_MEM)
373  {
374  /* Set up to allocate the hash header */
375  CurrentDynaHashCxt = TopMemoryContext;
376  }
377  else
378  {
379  /* Create the hash table's private memory context */
380  if (flags & HASH_CONTEXT)
381  CurrentDynaHashCxt = info->hcxt;
382  else
383  CurrentDynaHashCxt = TopMemoryContext;
384  CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,
385  "dynahash",
387  }
388 
389  /* Initialize the hash header, plus a copy of the table name */
390  hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
391  MemSet(hashp, 0, sizeof(HTAB));
392 
393  hashp->tabname = (char *) (hashp + 1);
394  strcpy(hashp->tabname, tabname);
395 
396  /* If we have a private context, label it with hashtable's name */
397  if (!(flags & HASH_SHARED_MEM))
398  MemoryContextSetIdentifier(CurrentDynaHashCxt, hashp->tabname);
399 
400  /*
401  * Select the appropriate hash function (see comments at head of file).
402  */
403  if (flags & HASH_FUNCTION)
404  {
405  Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));
406  hashp->hash = info->hash;
407  }
408  else if (flags & HASH_BLOBS)
409  {
410  Assert(!(flags & HASH_STRINGS));
411  /* We can optimize hashing for common key sizes */
412  if (info->keysize == sizeof(uint32))
413  hashp->hash = uint32_hash;
414  else
415  hashp->hash = tag_hash;
416  }
417  else
418  {
419  /*
420  * string_hash used to be considered the default hash method, and in a
421  * non-assert build it effectively still is. But we now consider it
422  * an assertion error to not say HASH_STRINGS explicitly. To help
423  * catch mistaken usage of HASH_STRINGS, we also insist on a
424  * reasonably long string length: if the keysize is only 4 or 8 bytes,
425  * it's almost certainly an integer or pointer not a string.
426  */
427  Assert(flags & HASH_STRINGS);
428  Assert(info->keysize > 8);
429 
430  hashp->hash = string_hash;
431  }
432 
433  /*
434  * If you don't specify a match function, it defaults to string_compare if
435  * you used string_hash, and to memcmp otherwise.
436  *
437  * Note: explicitly specifying string_hash is deprecated, because this
438  * might not work for callers in loadable modules on some platforms due to
439  * referencing a trampoline instead of the string_hash function proper.
440  * Specify HASH_STRINGS instead.
441  */
442  if (flags & HASH_COMPARE)
443  hashp->match = info->match;
444  else if (hashp->hash == string_hash)
446  else
447  hashp->match = memcmp;
448 
449  /*
450  * Similarly, the key-copying function defaults to strlcpy or memcpy.
451  */
452  if (flags & HASH_KEYCOPY)
453  hashp->keycopy = info->keycopy;
454  else if (hashp->hash == string_hash)
455  {
456  /*
457  * The signature of keycopy is meant for memcpy(), which returns
458  * void*, but strlcpy() returns size_t. Since we never use the return
459  * value of keycopy, and size_t is pretty much always the same size as
460  * void *, this should be safe. The extra cast in the middle is to
461  * avoid warnings from -Wcast-function-type.
462  */
464  }
465  else
466  hashp->keycopy = memcpy;
467 
468  /* And select the entry allocation function, too. */
469  if (flags & HASH_ALLOC)
470  hashp->alloc = info->alloc;
471  else
472  hashp->alloc = DynaHashAlloc;
473 
474  if (flags & HASH_SHARED_MEM)
475  {
476  /*
477  * ctl structure and directory are preallocated for shared memory
478  * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
479  * well.
480  */
481  hashp->hctl = info->hctl;
482  hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
483  hashp->hcxt = NULL;
484  hashp->isshared = true;
485 
486  /* hash table already exists, we're just attaching to it */
487  if (flags & HASH_ATTACH)
488  {
489  /* make local copies of some heavily-used values */
490  hctl = hashp->hctl;
491  hashp->keysize = hctl->keysize;
492  hashp->ssize = hctl->ssize;
493  hashp->sshift = hctl->sshift;
494 
495  return hashp;
496  }
497  }
498  else
499  {
500  /* setup hash table defaults */
501  hashp->hctl = NULL;
502  hashp->dir = NULL;
503  hashp->hcxt = CurrentDynaHashCxt;
504  hashp->isshared = false;
505  }
506 
507  if (!hashp->hctl)
508  {
509  hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
510  if (!hashp->hctl)
511  ereport(ERROR,
512  (errcode(ERRCODE_OUT_OF_MEMORY),
513  errmsg("out of memory")));
514  }
515 
516  hashp->frozen = false;
517 
518  hdefault(hashp);
519 
520  hctl = hashp->hctl;
521 
522  if (flags & HASH_PARTITION)
523  {
524  /* Doesn't make sense to partition a local hash table */
525  Assert(flags & HASH_SHARED_MEM);
526 
527  /*
528  * The number of partitions had better be a power of 2. Also, it must
529  * be less than INT_MAX (see init_htab()), so call the int version of
530  * next_pow2.
531  */
533 
534  hctl->num_partitions = info->num_partitions;
535  }
536 
537  if (flags & HASH_SEGMENT)
538  {
539  hctl->ssize = info->ssize;
540  hctl->sshift = my_log2(info->ssize);
541  /* ssize had better be a power of 2 */
542  Assert(hctl->ssize == (1L << hctl->sshift));
543  }
544 
545  /*
546  * SHM hash tables have fixed directory size passed by the caller.
547  */
548  if (flags & HASH_DIRSIZE)
549  {
550  hctl->max_dsize = info->max_dsize;
551  hctl->dsize = info->dsize;
552  }
553 
554  /* remember the entry sizes, too */
555  hctl->keysize = info->keysize;
556  hctl->entrysize = info->entrysize;
557 
558  /* make local copies of heavily-used constant fields */
559  hashp->keysize = hctl->keysize;
560  hashp->ssize = hctl->ssize;
561  hashp->sshift = hctl->sshift;
562 
563  /* Build the hash directory structure */
564  if (!init_htab(hashp, nelem))
565  elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
566 
567  /*
568  * For a shared hash table, preallocate the requested number of elements.
569  * This reduces problems with run-time out-of-shared-memory conditions.
570  *
571  * For a non-shared hash table, preallocate the requested number of
572  * elements if it's less than our chosen nelem_alloc. This avoids wasting
573  * space if the caller correctly estimates a small table size.
574  */
575  if ((flags & HASH_SHARED_MEM) ||
576  nelem < hctl->nelem_alloc)
577  {
578  int i,
579  freelist_partitions,
580  nelem_alloc,
581  nelem_alloc_first;
582 
583  /*
584  * If hash table is partitioned, give each freelist an equal share of
585  * the initial allocation. Otherwise only freeList[0] is used.
586  */
587  if (IS_PARTITIONED(hashp->hctl))
588  freelist_partitions = NUM_FREELISTS;
589  else
590  freelist_partitions = 1;
591 
592  nelem_alloc = nelem / freelist_partitions;
593  if (nelem_alloc <= 0)
594  nelem_alloc = 1;
595 
596  /*
597  * Make sure we'll allocate all the requested elements; freeList[0]
598  * gets the excess if the request isn't divisible by NUM_FREELISTS.
599  */
600  if (nelem_alloc * freelist_partitions < nelem)
601  nelem_alloc_first =
602  nelem - nelem_alloc * (freelist_partitions - 1);
603  else
604  nelem_alloc_first = nelem_alloc;
605 
606  for (i = 0; i < freelist_partitions; i++)
607  {
608  int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
609 
610  if (!element_alloc(hashp, temp, i))
611  ereport(ERROR,
612  (errcode(ERRCODE_OUT_OF_MEMORY),
613  errmsg("out of memory")));
614  }
615  }
616 
617  if (flags & HASH_FIXED_SIZE)
618  hashp->isfixed = true;
619  return hashp;
620 }
621 
622 /*
623  * Set default HASHHDR parameters.
624  */
625 static void
626 hdefault(HTAB *hashp)
627 {
628  HASHHDR *hctl = hashp->hctl;
629 
630  MemSet(hctl, 0, sizeof(HASHHDR));
631 
632  hctl->dsize = DEF_DIRSIZE;
633  hctl->nsegs = 0;
634 
635  hctl->num_partitions = 0; /* not partitioned */
636 
637  /* table has no fixed maximum size */
638  hctl->max_dsize = NO_MAX_DSIZE;
639 
640  hctl->ssize = DEF_SEGSIZE;
641  hctl->sshift = DEF_SEGSIZE_SHIFT;
642 
643 #ifdef HASH_STATISTICS
644  hctl->accesses = hctl->collisions = 0;
645 #endif
646 }
647 
648 /*
649  * Given the user-specified entry size, choose nelem_alloc, ie, how many
650  * elements to add to the hash table when we need more.
651  */
652 static int
654 {
655  int nelem_alloc;
656  Size elementSize;
657  Size allocSize;
658 
659  /* Each element has a HASHELEMENT header plus user data. */
660  /* NB: this had better match element_alloc() */
661  elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
662 
663  /*
664  * The idea here is to choose nelem_alloc at least 32, but round up so
665  * that the allocation request will be a power of 2 or just less. This
666  * makes little difference for hash tables in shared memory, but for hash
667  * tables managed by palloc, the allocation request will be rounded up to
668  * a power of 2 anyway. If we fail to take this into account, we'll waste
669  * as much as half the allocated space.
670  */
671  allocSize = 32 * 4; /* assume elementSize at least 8 */
672  do
673  {
674  allocSize <<= 1;
675  nelem_alloc = allocSize / elementSize;
676  } while (nelem_alloc < 32);
677 
678  return nelem_alloc;
679 }
680 
681 /*
682  * Compute derived fields of hctl and build the initial directory/segment
683  * arrays
684  */
685 static bool
686 init_htab(HTAB *hashp, long nelem)
687 {
688  HASHHDR *hctl = hashp->hctl;
689  HASHSEGMENT *segp;
690  int nbuckets;
691  int nsegs;
692  int i;
693 
694  /*
695  * initialize mutexes if it's a partitioned table
696  */
697  if (IS_PARTITIONED(hctl))
698  for (i = 0; i < NUM_FREELISTS; i++)
699  SpinLockInit(&(hctl->freeList[i].mutex));
700 
701  /*
702  * Allocate space for the next greater power of two number of buckets,
703  * assuming a desired maximum load factor of 1.
704  */
705  nbuckets = next_pow2_int(nelem);
706 
707  /*
708  * In a partitioned table, nbuckets must be at least equal to
709  * num_partitions; were it less, keys with apparently different partition
710  * numbers would map to the same bucket, breaking partition independence.
711  * (Normally nbuckets will be much bigger; this is just a safety check.)
712  */
713  while (nbuckets < hctl->num_partitions)
714  nbuckets <<= 1;
715 
716  hctl->max_bucket = hctl->low_mask = nbuckets - 1;
717  hctl->high_mask = (nbuckets << 1) - 1;
718 
719  /*
720  * Figure number of directory segments needed, round up to a power of 2
721  */
722  nsegs = (nbuckets - 1) / hctl->ssize + 1;
723  nsegs = next_pow2_int(nsegs);
724 
725  /*
726  * Make sure directory is big enough. If pre-allocated directory is too
727  * small, choke (caller screwed up).
728  */
729  if (nsegs > hctl->dsize)
730  {
731  if (!(hashp->dir))
732  hctl->dsize = nsegs;
733  else
734  return false;
735  }
736 
737  /* Allocate a directory */
738  if (!(hashp->dir))
739  {
740  CurrentDynaHashCxt = hashp->hcxt;
741  hashp->dir = (HASHSEGMENT *)
742  hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
743  if (!hashp->dir)
744  return false;
745  }
746 
747  /* Allocate initial segments */
748  for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
749  {
750  *segp = seg_alloc(hashp);
751  if (*segp == NULL)
752  return false;
753  }
754 
755  /* Choose number of entries to allocate at a time */
757 
758 #ifdef HASH_DEBUG
759  fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
760  "TABLE POINTER ", hashp,
761  "DIRECTORY SIZE ", hctl->dsize,
762  "SEGMENT SIZE ", hctl->ssize,
763  "SEGMENT SHIFT ", hctl->sshift,
764  "MAX BUCKET ", hctl->max_bucket,
765  "HIGH MASK ", hctl->high_mask,
766  "LOW MASK ", hctl->low_mask,
767  "NSEGS ", hctl->nsegs);
768 #endif
769  return true;
770 }
771 
772 /*
773  * Estimate the space needed for a hashtable containing the given number
774  * of entries of given size.
775  * NOTE: this is used to estimate the footprint of hashtables in shared
776  * memory; therefore it does not count HTAB which is in local memory.
777  * NB: assumes that all hash structure parameters have default values!
778  */
779 Size
781 {
782  Size size;
783  long nBuckets,
784  nSegments,
785  nDirEntries,
786  nElementAllocs,
787  elementSize,
788  elementAllocCnt;
789 
790  /* estimate number of buckets wanted */
791  nBuckets = next_pow2_long(num_entries);
792  /* # of segments needed for nBuckets */
793  nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
794  /* directory entries */
795  nDirEntries = DEF_DIRSIZE;
796  while (nDirEntries < nSegments)
797  nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
798 
799  /* fixed control info */
800  size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
801  /* directory */
802  size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
803  /* segments */
804  size = add_size(size, mul_size(nSegments,
805  MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
806  /* elements --- allocated in groups of choose_nelem_alloc() entries */
807  elementAllocCnt = choose_nelem_alloc(entrysize);
808  nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
809  elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
810  size = add_size(size,
811  mul_size(nElementAllocs,
812  mul_size(elementAllocCnt, elementSize)));
813 
814  return size;
815 }
816 
817 /*
818  * Select an appropriate directory size for a hashtable with the given
819  * maximum number of entries.
820  * This is only needed for hashtables in shared memory, whose directories
821  * cannot be expanded dynamically.
822  * NB: assumes that all hash structure parameters have default values!
823  *
824  * XXX this had better agree with the behavior of init_htab()...
825  */
826 long
827 hash_select_dirsize(long num_entries)
828 {
829  long nBuckets,
830  nSegments,
831  nDirEntries;
832 
833  /* estimate number of buckets wanted */
834  nBuckets = next_pow2_long(num_entries);
835  /* # of segments needed for nBuckets */
836  nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
837  /* directory entries */
838  nDirEntries = DEF_DIRSIZE;
839  while (nDirEntries < nSegments)
840  nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
841 
842  return nDirEntries;
843 }
844 
845 /*
846  * Compute the required initial memory allocation for a shared-memory
847  * hashtable with the given parameters. We need space for the HASHHDR
848  * and for the (non expansible) directory.
849  */
850 Size
851 hash_get_shared_size(HASHCTL *info, int flags)
852 {
853  Assert(flags & HASH_DIRSIZE);
854  Assert(info->dsize == info->max_dsize);
855  return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
856 }
857 
858 
859 /********************** DESTROY ROUTINES ************************/
860 
861 void
863 {
864  if (hashp != NULL)
865  {
866  /* allocation method must be one we know how to free, too */
867  Assert(hashp->alloc == DynaHashAlloc);
868  /* so this hashtable must have its own context */
869  Assert(hashp->hcxt != NULL);
870 
871  hash_stats("destroy", hashp);
872 
873  /*
874  * Free everything by destroying the hash table's memory context.
875  */
876  MemoryContextDelete(hashp->hcxt);
877  }
878 }
879 
880 void
881 hash_stats(const char *where, HTAB *hashp)
882 {
883 #ifdef HASH_STATISTICS
884  fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
885  where, hashp->hctl->accesses, hashp->hctl->collisions);
886 
887  fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
888  hash_get_num_entries(hashp), (long) hashp->hctl->keysize,
889  hashp->hctl->max_bucket, hashp->hctl->nsegs);
890  fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
891  where, hash_accesses, hash_collisions);
892  fprintf(stderr, "hash_stats: total expansions %ld\n",
893  hash_expansions);
894 #endif
895 }
896 
897 /*******************************SEARCH ROUTINES *****************************/
898 
899 
900 /*
901  * get_hash_value -- exported routine to calculate a key's hash value
902  *
903  * We export this because for partitioned tables, callers need to compute
904  * the partition number (from the low-order bits of the hash value) before
905  * searching.
906  */
907 uint32
908 get_hash_value(HTAB *hashp, const void *keyPtr)
909 {
910  return hashp->hash(keyPtr, hashp->keysize);
911 }
912 
913 /* Convert a hash value to a bucket number */
914 static inline uint32
915 calc_bucket(HASHHDR *hctl, uint32 hash_val)
916 {
917  uint32 bucket;
918 
919  bucket = hash_val & hctl->high_mask;
920  if (bucket > hctl->max_bucket)
921  bucket = bucket & hctl->low_mask;
922 
923  return bucket;
924 }
925 
926 /*
927  * hash_search -- look up key in table and perform action
928  * hash_search_with_hash_value -- same, with key's hash value already computed
929  *
930  * action is one of:
931  * HASH_FIND: look up key in table
932  * HASH_ENTER: look up key in table, creating entry if not present
933  * HASH_ENTER_NULL: same, but return NULL if out of memory
934  * HASH_REMOVE: look up key in table, remove entry if present
935  *
936  * Return value is a pointer to the element found/entered/removed if any,
937  * or NULL if no match was found. (NB: in the case of the REMOVE action,
938  * the result is a dangling pointer that shouldn't be dereferenced!)
939  *
940  * HASH_ENTER will normally ereport a generic "out of memory" error if
941  * it is unable to create a new entry. The HASH_ENTER_NULL operation is
942  * the same except it will return NULL if out of memory. Note that
943  * HASH_ENTER_NULL cannot be used with the default palloc-based allocator,
944  * since palloc internally ereports on out-of-memory.
945  *
946  * If foundPtr isn't NULL, then *foundPtr is set true if we found an
947  * existing entry in the table, false otherwise. This is needed in the
948  * HASH_ENTER case, but is redundant with the return value otherwise.
949  *
950  * For hash_search_with_hash_value, the hashvalue parameter must have been
951  * calculated with get_hash_value().
952  */
953 void *
955  const void *keyPtr,
957  bool *foundPtr)
958 {
959  return hash_search_with_hash_value(hashp,
960  keyPtr,
961  hashp->hash(keyPtr, hashp->keysize),
962  action,
963  foundPtr);
964 }
965 
966 void *
968  const void *keyPtr,
969  uint32 hashvalue,
971  bool *foundPtr)
972 {
973  HASHHDR *hctl = hashp->hctl;
974  int freelist_idx = FREELIST_IDX(hctl, hashvalue);
975  Size keysize;
976  uint32 bucket;
977  long segment_num;
978  long segment_ndx;
979  HASHSEGMENT segp;
980  HASHBUCKET currBucket;
981  HASHBUCKET *prevBucketPtr;
982  HashCompareFunc match;
983 
984 #ifdef HASH_STATISTICS
985  hash_accesses++;
986  hctl->accesses++;
987 #endif
988 
989  /*
990  * If inserting, check if it is time to split a bucket.
991  *
992  * NOTE: failure to expand table is not a fatal error, it just means we
993  * have to run at higher fill factor than we wanted. However, if we're
994  * using the palloc allocator then it will throw error anyway on
995  * out-of-memory, so we must do this before modifying the table.
996  */
997  if (action == HASH_ENTER || action == HASH_ENTER_NULL)
998  {
999  /*
1000  * Can't split if running in partitioned mode, nor if frozen, nor if
1001  * table is the subject of any active hash_seq_search scans.
1002  */
1003  if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
1004  !IS_PARTITIONED(hctl) && !hashp->frozen &&
1005  !has_seq_scans(hashp))
1006  (void) expand_table(hashp);
1007  }
1008 
1009  /*
1010  * Do the initial lookup
1011  */
1012  bucket = calc_bucket(hctl, hashvalue);
1013 
1014  segment_num = bucket >> hashp->sshift;
1015  segment_ndx = MOD(bucket, hashp->ssize);
1016 
1017  segp = hashp->dir[segment_num];
1018 
1019  if (segp == NULL)
1020  hash_corrupted(hashp);
1021 
1022  prevBucketPtr = &segp[segment_ndx];
1023  currBucket = *prevBucketPtr;
1024 
1025  /*
1026  * Follow collision chain looking for matching key
1027  */
1028  match = hashp->match; /* save one fetch in inner loop */
1029  keysize = hashp->keysize; /* ditto */
1030 
1031  while (currBucket != NULL)
1032  {
1033  if (currBucket->hashvalue == hashvalue &&
1034  match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
1035  break;
1036  prevBucketPtr = &(currBucket->link);
1037  currBucket = *prevBucketPtr;
1038 #ifdef HASH_STATISTICS
1039  hash_collisions++;
1040  hctl->collisions++;
1041 #endif
1042  }
1043 
1044  if (foundPtr)
1045  *foundPtr = (bool) (currBucket != NULL);
1046 
1047  /*
1048  * OK, now what?
1049  */
1050  switch (action)
1051  {
1052  case HASH_FIND:
1053  if (currBucket != NULL)
1054  return (void *) ELEMENTKEY(currBucket);
1055  return NULL;
1056 
1057  case HASH_REMOVE:
1058  if (currBucket != NULL)
1059  {
1060  /* if partitioned, must lock to touch nentries and freeList */
1061  if (IS_PARTITIONED(hctl))
1062  SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
1063 
1064  /* delete the record from the appropriate nentries counter. */
1065  Assert(hctl->freeList[freelist_idx].nentries > 0);
1066  hctl->freeList[freelist_idx].nentries--;
1067 
1068  /* remove record from hash bucket's chain. */
1069  *prevBucketPtr = currBucket->link;
1070 
1071  /* add the record to the appropriate freelist. */
1072  currBucket->link = hctl->freeList[freelist_idx].freeList;
1073  hctl->freeList[freelist_idx].freeList = currBucket;
1074 
1075  if (IS_PARTITIONED(hctl))
1076  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1077 
1078  /*
1079  * better hope the caller is synchronizing access to this
1080  * element, because someone else is going to reuse it the next
1081  * time something is added to the table
1082  */
1083  return (void *) ELEMENTKEY(currBucket);
1084  }
1085  return NULL;
1086 
1087  case HASH_ENTER_NULL:
1088  /* ENTER_NULL does not work with palloc-based allocator */
1089  Assert(hashp->alloc != DynaHashAlloc);
1090  /* FALL THRU */
1091 
1092  case HASH_ENTER:
1093  /* Return existing element if found, else create one */
1094  if (currBucket != NULL)
1095  return (void *) ELEMENTKEY(currBucket);
1096 
1097  /* disallow inserts if frozen */
1098  if (hashp->frozen)
1099  elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
1100  hashp->tabname);
1101 
1102  currBucket = get_hash_entry(hashp, freelist_idx);
1103  if (currBucket == NULL)
1104  {
1105  /* out of memory */
1106  if (action == HASH_ENTER_NULL)
1107  return NULL;
1108  /* report a generic message */
1109  if (hashp->isshared)
1110  ereport(ERROR,
1111  (errcode(ERRCODE_OUT_OF_MEMORY),
1112  errmsg("out of shared memory")));
1113  else
1114  ereport(ERROR,
1115  (errcode(ERRCODE_OUT_OF_MEMORY),
1116  errmsg("out of memory")));
1117  }
1118 
1119  /* link into hashbucket chain */
1120  *prevBucketPtr = currBucket;
1121  currBucket->link = NULL;
1122 
1123  /* copy key into record */
1124  currBucket->hashvalue = hashvalue;
1125  hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
1126 
1127  /*
1128  * Caller is expected to fill the data field on return. DO NOT
1129  * insert any code that could possibly throw error here, as doing
1130  * so would leave the table entry incomplete and hence corrupt the
1131  * caller's data structure.
1132  */
1133 
1134  return (void *) ELEMENTKEY(currBucket);
1135  }
1136 
1137  elog(ERROR, "unrecognized hash action code: %d", (int) action);
1138 
1139  return NULL; /* keep compiler quiet */
1140 }
1141 
1142 /*
1143  * hash_update_hash_key -- change the hash key of an existing table entry
1144  *
1145  * This is equivalent to removing the entry, making a new entry, and copying
1146  * over its data, except that the entry never goes to the table's freelist.
1147  * Therefore this cannot suffer an out-of-memory failure, even if there are
1148  * other processes operating in other partitions of the hashtable.
1149  *
1150  * Returns true if successful, false if the requested new hash key is already
1151  * present. Throws error if the specified entry pointer isn't actually a
1152  * table member.
1153  *
1154  * NB: currently, there is no special case for old and new hash keys being
1155  * identical, which means we'll report false for that situation. This is
1156  * preferable for existing uses.
1157  *
1158  * NB: for a partitioned hashtable, caller must hold lock on both relevant
1159  * partitions, if the new hash key would belong to a different partition.
1160  */
1161 bool
1163  void *existingEntry,
1164  const void *newKeyPtr)
1165 {
1166  HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1167  HASHHDR *hctl = hashp->hctl;
1168  uint32 newhashvalue;
1169  Size keysize;
1170  uint32 bucket;
1171  uint32 newbucket;
1172  long segment_num;
1173  long segment_ndx;
1174  HASHSEGMENT segp;
1175  HASHBUCKET currBucket;
1176  HASHBUCKET *prevBucketPtr;
1177  HASHBUCKET *oldPrevPtr;
1178  HashCompareFunc match;
1179 
1180 #ifdef HASH_STATISTICS
1181  hash_accesses++;
1182  hctl->accesses++;
1183 #endif
1184 
1185  /* disallow updates if frozen */
1186  if (hashp->frozen)
1187  elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1188  hashp->tabname);
1189 
1190  /*
1191  * Lookup the existing element using its saved hash value. We need to do
1192  * this to be able to unlink it from its hash chain, but as a side benefit
1193  * we can verify the validity of the passed existingEntry pointer.
1194  */
1195  bucket = calc_bucket(hctl, existingElement->hashvalue);
1196 
1197  segment_num = bucket >> hashp->sshift;
1198  segment_ndx = MOD(bucket, hashp->ssize);
1199 
1200  segp = hashp->dir[segment_num];
1201 
1202  if (segp == NULL)
1203  hash_corrupted(hashp);
1204 
1205  prevBucketPtr = &segp[segment_ndx];
1206  currBucket = *prevBucketPtr;
1207 
1208  while (currBucket != NULL)
1209  {
1210  if (currBucket == existingElement)
1211  break;
1212  prevBucketPtr = &(currBucket->link);
1213  currBucket = *prevBucketPtr;
1214  }
1215 
1216  if (currBucket == NULL)
1217  elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1218  hashp->tabname);
1219 
1220  oldPrevPtr = prevBucketPtr;
1221 
1222  /*
1223  * Now perform the equivalent of a HASH_ENTER operation to locate the hash
1224  * chain we want to put the entry into.
1225  */
1226  newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
1227 
1228  newbucket = calc_bucket(hctl, newhashvalue);
1229 
1230  segment_num = newbucket >> hashp->sshift;
1231  segment_ndx = MOD(newbucket, hashp->ssize);
1232 
1233  segp = hashp->dir[segment_num];
1234 
1235  if (segp == NULL)
1236  hash_corrupted(hashp);
1237 
1238  prevBucketPtr = &segp[segment_ndx];
1239  currBucket = *prevBucketPtr;
1240 
1241  /*
1242  * Follow collision chain looking for matching key
1243  */
1244  match = hashp->match; /* save one fetch in inner loop */
1245  keysize = hashp->keysize; /* ditto */
1246 
1247  while (currBucket != NULL)
1248  {
1249  if (currBucket->hashvalue == newhashvalue &&
1250  match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1251  break;
1252  prevBucketPtr = &(currBucket->link);
1253  currBucket = *prevBucketPtr;
1254 #ifdef HASH_STATISTICS
1255  hash_collisions++;
1256  hctl->collisions++;
1257 #endif
1258  }
1259 
1260  if (currBucket != NULL)
1261  return false; /* collision with an existing entry */
1262 
1263  currBucket = existingElement;
1264 
1265  /*
1266  * If old and new hash values belong to the same bucket, we need not
1267  * change any chain links, and indeed should not since this simplistic
1268  * update will corrupt the list if currBucket is the last element. (We
1269  * cannot fall out earlier, however, since we need to scan the bucket to
1270  * check for duplicate keys.)
1271  */
1272  if (bucket != newbucket)
1273  {
1274  /* OK to remove record from old hash bucket's chain. */
1275  *oldPrevPtr = currBucket->link;
1276 
1277  /* link into new hashbucket chain */
1278  *prevBucketPtr = currBucket;
1279  currBucket->link = NULL;
1280  }
1281 
1282  /* copy new key into record */
1283  currBucket->hashvalue = newhashvalue;
1284  hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1285 
1286  /* rest of record is untouched */
1287 
1288  return true;
1289 }
1290 
1291 /*
1292  * Allocate a new hashtable entry if possible; return NULL if out of memory.
1293  * (Or, if the underlying space allocator throws error for out-of-memory,
1294  * we won't return at all.)
1295  */
1296 static HASHBUCKET
1297 get_hash_entry(HTAB *hashp, int freelist_idx)
1298 {
1299  HASHHDR *hctl = hashp->hctl;
1300  HASHBUCKET newElement;
1301 
1302  for (;;)
1303  {
1304  /* if partitioned, must lock to touch nentries and freeList */
1305  if (IS_PARTITIONED(hctl))
1306  SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1307 
1308  /* try to get an entry from the freelist */
1309  newElement = hctl->freeList[freelist_idx].freeList;
1310 
1311  if (newElement != NULL)
1312  break;
1313 
1314  if (IS_PARTITIONED(hctl))
1315  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1316 
1317  /*
1318  * No free elements in this freelist. In a partitioned table, there
1319  * might be entries in other freelists, but to reduce contention we
1320  * prefer to first try to get another chunk of buckets from the main
1321  * shmem allocator. If that fails, though, we *MUST* root through all
1322  * the other freelists before giving up. There are multiple callers
1323  * that assume that they can allocate every element in the initially
1324  * requested table size, or that deleting an element guarantees they
1325  * can insert a new element, even if shared memory is entirely full.
1326  * Failing because the needed element is in a different freelist is
1327  * not acceptable.
1328  */
1329  if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1330  {
1331  int borrow_from_idx;
1332 
1333  if (!IS_PARTITIONED(hctl))
1334  return NULL; /* out of memory */
1335 
1336  /* try to borrow element from another freelist */
1337  borrow_from_idx = freelist_idx;
1338  for (;;)
1339  {
1340  borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
1341  if (borrow_from_idx == freelist_idx)
1342  break; /* examined all freelists, fail */
1343 
1344  SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
1345  newElement = hctl->freeList[borrow_from_idx].freeList;
1346 
1347  if (newElement != NULL)
1348  {
1349  hctl->freeList[borrow_from_idx].freeList = newElement->link;
1350  SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1351 
1352  /* careful: count the new element in its proper freelist */
1353  SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1354  hctl->freeList[freelist_idx].nentries++;
1355  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1356 
1357  return newElement;
1358  }
1359 
1360  SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1361  }
1362 
1363  /* no elements available to borrow either, so out of memory */
1364  return NULL;
1365  }
1366  }
1367 
1368  /* remove entry from freelist, bump nentries */
1369  hctl->freeList[freelist_idx].freeList = newElement->link;
1370  hctl->freeList[freelist_idx].nentries++;
1371 
1372  if (IS_PARTITIONED(hctl))
1373  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1374 
1375  return newElement;
1376 }
1377 
1378 /*
1379  * hash_get_num_entries -- get the number of entries in a hashtable
1380  */
1381 long
1383 {
1384  int i;
1385  long sum = hashp->hctl->freeList[0].nentries;
1386 
1387  /*
1388  * We currently don't bother with acquiring the mutexes; it's only
1389  * sensible to call this function if you've got lock on all partitions of
1390  * the table.
1391  */
1392  if (IS_PARTITIONED(hashp->hctl))
1393  {
1394  for (i = 1; i < NUM_FREELISTS; i++)
1395  sum += hashp->hctl->freeList[i].nentries;
1396  }
1397 
1398  return sum;
1399 }
1400 
1401 /*
1402  * hash_seq_init/_search/_term
1403  * Sequentially search through hash table and return
1404  * all the elements one by one, return NULL when no more.
1405  *
1406  * hash_seq_term should be called if and only if the scan is abandoned before
1407  * completion; if hash_seq_search returns NULL then it has already done the
1408  * end-of-scan cleanup.
1409  *
1410  * NOTE: caller may delete the returned element before continuing the scan.
1411  * However, deleting any other element while the scan is in progress is
1412  * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
1413  * if elements are added to the table while the scan is in progress, it is
1414  * unspecified whether they will be visited by the scan or not.
1415  *
1416  * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
1417  * worry about hash_seq_term cleanup, if the hashtable is first locked against
1418  * further insertions by calling hash_freeze.
1419  *
1420  * NOTE: to use this with a partitioned hashtable, caller had better hold
1421  * at least shared lock on all partitions of the table throughout the scan!
1422  * We can cope with insertions or deletions by our own backend, but *not*
1423  * with concurrent insertions or deletions by another.
1424  */
1425 void
1427 {
1428  status->hashp = hashp;
1429  status->curBucket = 0;
1430  status->curEntry = NULL;
1431  if (!hashp->frozen)
1432  register_seq_scan(hashp);
1433 }
1434 
1435 void *
1437 {
1438  HTAB *hashp;
1439  HASHHDR *hctl;
1441  long ssize;
1442  long segment_num;
1443  long segment_ndx;
1444  HASHSEGMENT segp;
1445  uint32 curBucket;
1446  HASHELEMENT *curElem;
1447 
1448  if ((curElem = status->curEntry) != NULL)
1449  {
1450  /* Continuing scan of curBucket... */
1451  status->curEntry = curElem->link;
1452  if (status->curEntry == NULL) /* end of this bucket */
1453  ++status->curBucket;
1454  return (void *) ELEMENTKEY(curElem);
1455  }
1456 
1457  /*
1458  * Search for next nonempty bucket starting at curBucket.
1459  */
1460  curBucket = status->curBucket;
1461  hashp = status->hashp;
1462  hctl = hashp->hctl;
1463  ssize = hashp->ssize;
1464  max_bucket = hctl->max_bucket;
1465 
1466  if (curBucket > max_bucket)
1467  {
1468  hash_seq_term(status);
1469  return NULL; /* search is done */
1470  }
1471 
1472  /*
1473  * first find the right segment in the table directory.
1474  */
1475  segment_num = curBucket >> hashp->sshift;
1476  segment_ndx = MOD(curBucket, ssize);
1477 
1478  segp = hashp->dir[segment_num];
1479 
1480  /*
1481  * Pick up the first item in this bucket's chain. If chain is not empty
1482  * we can begin searching it. Otherwise we have to advance to find the
1483  * next nonempty bucket. We try to optimize that case since searching a
1484  * near-empty hashtable has to iterate this loop a lot.
1485  */
1486  while ((curElem = segp[segment_ndx]) == NULL)
1487  {
1488  /* empty bucket, advance to next */
1489  if (++curBucket > max_bucket)
1490  {
1491  status->curBucket = curBucket;
1492  hash_seq_term(status);
1493  return NULL; /* search is done */
1494  }
1495  if (++segment_ndx >= ssize)
1496  {
1497  segment_num++;
1498  segment_ndx = 0;
1499  segp = hashp->dir[segment_num];
1500  }
1501  }
1502 
1503  /* Begin scan of curBucket... */
1504  status->curEntry = curElem->link;
1505  if (status->curEntry == NULL) /* end of this bucket */
1506  ++curBucket;
1507  status->curBucket = curBucket;
1508  return (void *) ELEMENTKEY(curElem);
1509 }
1510 
1511 void
1513 {
1514  if (!status->hashp->frozen)
1515  deregister_seq_scan(status->hashp);
1516 }
1517 
1518 /*
1519  * hash_freeze
1520  * Freeze a hashtable against future insertions (deletions are
1521  * still allowed)
1522  *
1523  * The reason for doing this is that by preventing any more bucket splits,
1524  * we no longer need to worry about registering hash_seq_search scans,
1525  * and thus caller need not be careful about ensuring hash_seq_term gets
1526  * called at the right times.
1527  *
1528  * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
1529  * with active scans (since hash_seq_term would then do the wrong thing).
1530  */
1531 void
1533 {
1534  if (hashp->isshared)
1535  elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
1536  if (!hashp->frozen && has_seq_scans(hashp))
1537  elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
1538  hashp->tabname);
1539  hashp->frozen = true;
1540 }
1541 
1542 
1543 /********************************* UTILITIES ************************/
1544 
1545 /*
1546  * Expand the table by adding one more hash bucket.
1547  */
1548 static bool
1550 {
1551  HASHHDR *hctl = hashp->hctl;
1552  HASHSEGMENT old_seg,
1553  new_seg;
1554  long old_bucket,
1555  new_bucket;
1556  long new_segnum,
1557  new_segndx;
1558  long old_segnum,
1559  old_segndx;
1560  HASHBUCKET *oldlink,
1561  *newlink;
1562  HASHBUCKET currElement,
1563  nextElement;
1564 
1565  Assert(!IS_PARTITIONED(hctl));
1566 
1567 #ifdef HASH_STATISTICS
1568  hash_expansions++;
1569 #endif
1570 
1571  new_bucket = hctl->max_bucket + 1;
1572  new_segnum = new_bucket >> hashp->sshift;
1573  new_segndx = MOD(new_bucket, hashp->ssize);
1574 
1575  if (new_segnum >= hctl->nsegs)
1576  {
1577  /* Allocate new segment if necessary -- could fail if dir full */
1578  if (new_segnum >= hctl->dsize)
1579  if (!dir_realloc(hashp))
1580  return false;
1581  if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
1582  return false;
1583  hctl->nsegs++;
1584  }
1585 
1586  /* OK, we created a new bucket */
1587  hctl->max_bucket++;
1588 
1589  /*
1590  * *Before* changing masks, find old bucket corresponding to same hash
1591  * values; values in that bucket may need to be relocated to new bucket.
1592  * Note that new_bucket is certainly larger than low_mask at this point,
1593  * so we can skip the first step of the regular hash mask calc.
1594  */
1595  old_bucket = (new_bucket & hctl->low_mask);
1596 
1597  /*
1598  * If we crossed a power of 2, readjust masks.
1599  */
1600  if ((uint32) new_bucket > hctl->high_mask)
1601  {
1602  hctl->low_mask = hctl->high_mask;
1603  hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
1604  }
1605 
1606  /*
1607  * Relocate records to the new bucket. NOTE: because of the way the hash
1608  * masking is done in calc_bucket, only one old bucket can need to be
1609  * split at this point. With a different way of reducing the hash value,
1610  * that might not be true!
1611  */
1612  old_segnum = old_bucket >> hashp->sshift;
1613  old_segndx = MOD(old_bucket, hashp->ssize);
1614 
1615  old_seg = hashp->dir[old_segnum];
1616  new_seg = hashp->dir[new_segnum];
1617 
1618  oldlink = &old_seg[old_segndx];
1619  newlink = &new_seg[new_segndx];
1620 
1621  for (currElement = *oldlink;
1622  currElement != NULL;
1623  currElement = nextElement)
1624  {
1625  nextElement = currElement->link;
1626  if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
1627  {
1628  *oldlink = currElement;
1629  oldlink = &currElement->link;
1630  }
1631  else
1632  {
1633  *newlink = currElement;
1634  newlink = &currElement->link;
1635  }
1636  }
1637  /* don't forget to terminate the rebuilt hash chains... */
1638  *oldlink = NULL;
1639  *newlink = NULL;
1640 
1641  return true;
1642 }
1643 
1644 
1645 static bool
1647 {
1648  HASHSEGMENT *p;
1649  HASHSEGMENT *old_p;
1650  long new_dsize;
1651  long old_dirsize;
1652  long new_dirsize;
1653 
1654  if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
1655  return false;
1656 
1657  /* Reallocate directory */
1658  new_dsize = hashp->hctl->dsize << 1;
1659  old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
1660  new_dirsize = new_dsize * sizeof(HASHSEGMENT);
1661 
1662  old_p = hashp->dir;
1663  CurrentDynaHashCxt = hashp->hcxt;
1664  p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
1665 
1666  if (p != NULL)
1667  {
1668  memcpy(p, old_p, old_dirsize);
1669  MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
1670  hashp->dir = p;
1671  hashp->hctl->dsize = new_dsize;
1672 
1673  /* XXX assume the allocator is palloc, so we know how to free */
1674  Assert(hashp->alloc == DynaHashAlloc);
1675  pfree(old_p);
1676 
1677  return true;
1678  }
1679 
1680  return false;
1681 }
1682 
1683 
1684 static HASHSEGMENT
1686 {
1687  HASHSEGMENT segp;
1688 
1689  CurrentDynaHashCxt = hashp->hcxt;
1690  segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
1691 
1692  if (!segp)
1693  return NULL;
1694 
1695  MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
1696 
1697  return segp;
1698 }
1699 
1700 /*
1701  * allocate some new elements and link them into the indicated free list
1702  */
1703 static bool
1704 element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1705 {
1706  HASHHDR *hctl = hashp->hctl;
1707  Size elementSize;
1708  HASHELEMENT *firstElement;
1709  HASHELEMENT *tmpElement;
1710  HASHELEMENT *prevElement;
1711  int i;
1712 
1713  if (hashp->isfixed)
1714  return false;
1715 
1716  /* Each element has a HASHELEMENT header plus user data. */
1717  elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1718 
1719  CurrentDynaHashCxt = hashp->hcxt;
1720  firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
1721 
1722  if (!firstElement)
1723  return false;
1724 
1725  /* prepare to link all the new entries into the freelist */
1726  prevElement = NULL;
1727  tmpElement = firstElement;
1728  for (i = 0; i < nelem; i++)
1729  {
1730  tmpElement->link = prevElement;
1731  prevElement = tmpElement;
1732  tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
1733  }
1734 
1735  /* if partitioned, must lock to touch freeList */
1736  if (IS_PARTITIONED(hctl))
1737  SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1738 
1739  /* freelist could be nonempty if two backends did this concurrently */
1740  firstElement->link = hctl->freeList[freelist_idx].freeList;
1741  hctl->freeList[freelist_idx].freeList = prevElement;
1742 
1743  if (IS_PARTITIONED(hctl))
1744  SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1745 
1746  return true;
1747 }
1748 
1749 /* complain when we have detected a corrupted hashtable */
1750 static void
1752 {
1753  /*
1754  * If the corruption is in a shared hashtable, we'd better force a
1755  * systemwide restart. Otherwise, just shut down this one backend.
1756  */
1757  if (hashp->isshared)
1758  elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
1759  else
1760  elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
1761 }
1762 
1763 /* calculate ceil(log base 2) of num */
1764 int
1765 my_log2(long num)
1766 {
1767  /*
1768  * guard against too-large input, which would be invalid for
1769  * pg_ceil_log2_*()
1770  */
1771  if (num > LONG_MAX / 2)
1772  num = LONG_MAX / 2;
1773 
1774 #if SIZEOF_LONG < 8
1775  return pg_ceil_log2_32(num);
1776 #else
1777  return pg_ceil_log2_64(num);
1778 #endif
1779 }
1780 
1781 /* calculate first power of 2 >= num, bounded to what will fit in a long */
1782 static long
1784 {
1785  /* my_log2's internal range check is sufficient */
1786  return 1L << my_log2(num);
1787 }
1788 
1789 /* calculate first power of 2 >= num, bounded to what will fit in an int */
1790 static int
1791 next_pow2_int(long num)
1792 {
1793  if (num > INT_MAX / 2)
1794  num = INT_MAX / 2;
1795  return 1 << my_log2(num);
1796 }
1797 
1798 
1799 /************************* SEQ SCAN TRACKING ************************/
1800 
1801 /*
1802  * We track active hash_seq_search scans here. The need for this mechanism
1803  * comes from the fact that a scan will get confused if a bucket split occurs
1804  * while it's in progress: it might visit entries twice, or even miss some
1805  * entirely (if it's partway through the same bucket that splits). Hence
1806  * we want to inhibit bucket splits if there are any active scans on the
1807  * table being inserted into. This is a fairly rare case in current usage,
1808  * so just postponing the split until the next insertion seems sufficient.
1809  *
1810  * Given present usages of the function, only a few scans are likely to be
1811  * open concurrently; so a finite-size stack of open scans seems sufficient,
1812  * and we don't worry that linear search is too slow. Note that we do
1813  * allow multiple scans of the same hashtable to be open concurrently.
1814  *
1815  * This mechanism can support concurrent scan and insertion in a shared
1816  * hashtable if it's the same backend doing both. It would fail otherwise,
1817  * but locking reasons seem to preclude any such scenario anyway, so we don't
1818  * worry.
1819  *
1820  * This arrangement is reasonably robust if a transient hashtable is deleted
1821  * without notifying us. The absolute worst case is we might inhibit splits
1822  * in another table created later at exactly the same address. We will give
1823  * a warning at transaction end for reference leaks, so any bugs leading to
1824  * lack of notification should be easy to catch.
1825  */
1826 
1827 #define MAX_SEQ_SCANS 100
1828 
1829 static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
1830 static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
1831 static int num_seq_scans = 0;
1832 
1833 
1834 /* Register a table as having an active hash_seq_search scan */
1835 static void
1837 {
1839  elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
1840  hashp->tabname);
1841  seq_scan_tables[num_seq_scans] = hashp;
1843  num_seq_scans++;
1844 }
1845 
1846 /* Deregister an active scan */
1847 static void
1849 {
1850  int i;
1851 
1852  /* Search backward since it's most likely at the stack top */
1853  for (i = num_seq_scans - 1; i >= 0; i--)
1854  {
1855  if (seq_scan_tables[i] == hashp)
1856  {
1857  seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
1859  num_seq_scans--;
1860  return;
1861  }
1862  }
1863  elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
1864  hashp->tabname);
1865 }
1866 
1867 /* Check if a table has any active scan */
1868 static bool
1870 {
1871  int i;
1872 
1873  for (i = 0; i < num_seq_scans; i++)
1874  {
1875  if (seq_scan_tables[i] == hashp)
1876  return true;
1877  }
1878  return false;
1879 }
1880 
1881 /* Clean up any open scans at end of transaction */
1882 void
1883 AtEOXact_HashTables(bool isCommit)
1884 {
1885  /*
1886  * During abort cleanup, open scans are expected; just silently clean 'em
1887  * out. An open scan at commit means someone forgot a hash_seq_term()
1888  * call, so complain.
1889  *
1890  * Note: it's tempting to try to print the tabname here, but refrain for
1891  * fear of touching deallocated memory. This isn't a user-facing message
1892  * anyway, so it needn't be pretty.
1893  */
1894  if (isCommit)
1895  {
1896  int i;
1897 
1898  for (i = 0; i < num_seq_scans; i++)
1899  {
1900  elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1901  seq_scan_tables[i]);
1902  }
1903  }
1904  num_seq_scans = 0;
1905 }
1906 
1907 /* Clean up any open scans at end of subtransaction */
1908 void
1909 AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1910 {
1911  int i;
1912 
1913  /*
1914  * Search backward to make cleanup easy. Note we must check all entries,
1915  * not only those at the end of the array, because deletion technique
1916  * doesn't keep them in order.
1917  */
1918  for (i = num_seq_scans - 1; i >= 0; i--)
1919  {
1920  if (seq_scan_level[i] >= nestDepth)
1921  {
1922  if (isCommit)
1923  elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1924  seq_scan_tables[i]);
1925  seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
1927  num_seq_scans--;
1928  }
1929  }
1930 }
void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:967
static int seq_scan_level[MAX_SEQ_SCANS]
Definition: dynahash.c:1830
int(* HashCompareFunc)(const void *key1, const void *key2, Size keysize)
Definition: hsearch.h:29
Size keysize
Definition: dynahash.c:191
int slock_t
Definition: s_lock.h:958
Size keysize
Definition: dynahash.c:236
long dsize
Definition: dynahash.c:184
uint32(* HashValueFunc)(const void *key, Size keysize)
Definition: hsearch.h:21
uint32 curBucket
Definition: hsearch.h:123
void hash_destroy(HTAB *hashp)
Definition: dynahash.c:862
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
#define HASH_CONTEXT
Definition: hsearch.h:102
#define HASH_ELEM
Definition: hsearch.h:95
uint32 max_bucket
Definition: dynahash.c:186
MemoryContext hcxt
Definition: hsearch.h:86
long ssize
Definition: hsearch.h:70
static long next_pow2_long(long num)
Definition: dynahash.c:1783
HashCopyFunc keycopy
Definition: hsearch.h:82
Size entrysize
Definition: dynahash.c:192
static uint32 calc_bucket(HASHHDR *hctl, uint32 hash_val)
Definition: dynahash.c:915
#define IS_PARTITIONED(hctl)
Definition: dynahash.c:210
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
#define SpinLockInit(lock)
Definition: spin.h:60
#define NO_MAX_DSIZE
Definition: hsearch.h:108
#define ELEMENTKEY(helem)
Definition: dynahash.c:244
long num_partitions
Definition: dynahash.c:193
void AtEOSubXact_HashTables(bool isCommit, int nestDepth)
Definition: dynahash.c:1909
Size entrysize
Definition: hsearch.h:76
#define NUM_FREELISTS
Definition: dynahash.c:128
void hash_stats(const char *where, HTAB *hashp)
Definition: dynahash.c:881
#define ELEMENT_FROM_KEY(key)
Definition: dynahash.c:249
int errcode(int sqlerrcode)
Definition: elog.c:698
HashAllocFunc alloc
Definition: dynahash.c:226
#define MemSet(start, val, len)
Definition: c.h:1008
HASHELEMENT * curEntry
Definition: hsearch.h:124
long hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1382
slock_t mutex
Definition: dynahash.c:155
uint32 low_mask
Definition: dynahash.c:188
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:954
#define HASH_SHARED_MEM
Definition: hsearch.h:103
static int string_compare(const char *key1, const char *key2, Size keysize)
Definition: dynahash.c:304
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx)
Definition: dynahash.c:1297
#define fprintf
Definition: port.h:221
long dsize
Definition: hsearch.h:72
#define PANIC
Definition: elog.h:50
uint32 uint32_hash(const void *key, Size keysize)
Definition: hashfn.c:688
#define HASH_PARTITION
Definition: hsearch.h:92
#define HASH_ATTACH
Definition: hsearch.h:104
static uint64 pg_ceil_log2_64(uint64 num)
Definition: pg_bitutils.h:248
HashValueFunc hash
Definition: dynahash.c:223
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:219
uint32 get_hash_value(HTAB *hashp, const void *keyPtr)
Definition: dynahash.c:908
long max_dsize
Definition: hsearch.h:73
void pfree(void *pointer)
Definition: mcxt.c:1169
HASHBUCKET * HASHSEGMENT
Definition: dynahash.c:134
long ssize
Definition: dynahash.c:237
#define ERROR
Definition: elog.h:46
long num_partitions
Definition: hsearch.h:68
#define FATAL
Definition: elog.h:49
static uint32 pg_ceil_log2_32(uint32 num)
Definition: pg_bitutils.h:235
int sshift
Definition: dynahash.c:238
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
#define DEF_SEGSIZE_SHIFT
Definition: dynahash.c:124
#define MOD(x, y)
Definition: dynahash.c:255
HashAllocFunc alloc
Definition: hsearch.h:84
#define HASH_KEYCOPY
Definition: hsearch.h:100
long max_dsize
Definition: dynahash.c:194
static void hdefault(HTAB *hashp)
Definition: dynahash.c:626
HASHHDR * hctl
Definition: dynahash.c:221
static void hash_corrupted(HTAB *hashp)
Definition: dynahash.c:1751
static bool has_seq_scans(HTAB *hashp)
Definition: dynahash.c:1869
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:349
struct HASHHDR HASHHDR
Definition: hsearch.h:58
unsigned int uint32
Definition: c.h:441
HASHELEMENT * HASHBUCKET
Definition: dynahash.c:131
HTAB * hashp
Definition: hsearch.h:122
static bool init_htab(HTAB *hashp, long nelem)
Definition: dynahash.c:686
uint32 high_mask
Definition: dynahash.c:187
long nentries
Definition: dynahash.c:156
MemoryContext TopMemoryContext
Definition: mcxt.c:48
int my_log2(long num)
Definition: dynahash.c:1765
#define WARNING
Definition: elog.h:40
HashCompareFunc match
Definition: dynahash.c:224
Size hash_estimate_size(long num_entries, Size entrysize)
Definition: dynahash.c:780
static int next_pow2_int(long num)
Definition: dynahash.c:1791
bool isshared
Definition: dynahash.c:229
#define SpinLockRelease(lock)
Definition: spin.h:64
#define MAX_SEQ_SCANS
Definition: dynahash.c:1827
#define HASH_BLOBS
Definition: hsearch.h:97
bool frozen
Definition: dynahash.c:233
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
static int num_seq_scans
Definition: dynahash.c:1831
static HTAB * seq_scan_tables[MAX_SEQ_SCANS]
Definition: dynahash.c:1829
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
long ssize
Definition: dynahash.c:195
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
FreeListData freeList[NUM_FREELISTS]
Definition: dynahash.c:180
static void deregister_seq_scan(HTAB *hashp)
Definition: dynahash.c:1848
Size keysize
Definition: hsearch.h:75
void *(* HashCopyFunc)(void *dest, const void *src, Size keysize)
Definition: hsearch.h:37
HashCompareFunc match
Definition: hsearch.h:80
MemoryContext hcxt
Definition: dynahash.c:227
#define ereport(elevel,...)
Definition: elog.h:157
long hash_select_dirsize(long num_entries)
Definition: dynahash.c:827
HashCopyFunc keycopy
Definition: dynahash.c:225
#define HASH_SEGMENT
Definition: hsearch.h:93
int GetCurrentTransactionNestLevel(void)
Definition: xact.c:858
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
#define MemoryContextIsValid(context)
Definition: memnodes.h:104
static void register_seq_scan(HTAB *hashp)
Definition: dynahash.c:1836
#define Assert(condition)
Definition: c.h:804
int nelem_alloc
Definition: dynahash.c:197
void *(* HashAllocFunc)(Size request)
Definition: hsearch.h:44
#define HASH_COMPARE
Definition: hsearch.h:99
size_t Size
Definition: c.h:540
void hash_freeze(HTAB *hashp)
Definition: dynahash.c:1532
static HASHSEGMENT seg_alloc(HTAB *hashp)
Definition: dynahash.c:1685
#define MAXALIGN(LEN)
Definition: c.h:757
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:336
Size hash_get_shared_size(HASHCTL *info, int flags)
Definition: dynahash.c:851
static bool expand_table(HTAB *hashp)
Definition: dynahash.c:1549
bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr)
Definition: dynahash.c:1162
HASHHDR * hctl
Definition: hsearch.h:88
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1436
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1426
char * tabname
Definition: dynahash.c:228
#define HASH_FIXED_SIZE
Definition: hsearch.h:105
bool isfixed
Definition: dynahash.c:230
static void * DynaHashAlloc(Size size)
Definition: dynahash.c:289
#define HASH_DIRSIZE
Definition: hsearch.h:94
int errmsg(const char *fmt,...)
Definition: elog.c:909
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
struct HASHELEMENT * link
Definition: hsearch.h:53
static int choose_nelem_alloc(Size entrysize)
Definition: dynahash.c:653
#define elog(elevel,...)
Definition: elog.h:232
static MemoryContext CurrentDynaHashCxt
Definition: dynahash.c:286
int i
#define HASH_ALLOC
Definition: hsearch.h:101
HASHELEMENT * freeList
Definition: dynahash.c:157
void(* pg_funcptr_t)(void)
Definition: c.h:340
HASHSEGMENT * dir
Definition: dynahash.c:222
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx)
Definition: dynahash.c:1704
#define FREELIST_IDX(hctl, hashcode)
Definition: dynahash.c:212
void AtEOXact_HashTables(bool isCommit)
Definition: dynahash.c:1883
#define HASH_STRINGS
Definition: hsearch.h:96
static void static void status(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:229
int sshift
Definition: dynahash.c:196
static bool dir_realloc(HTAB *hashp)
Definition: dynahash.c:1646
#define DEF_SEGSIZE
Definition: dynahash.c:123
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1512
uint32 hashvalue
Definition: hsearch.h:54
HASHACTION
Definition: hsearch.h:111
HashValueFunc hash
Definition: hsearch.h:78
#define HASH_FUNCTION
Definition: hsearch.h:98
unsigned char bool
Definition: c.h:391
#define DEF_DIRSIZE
Definition: dynahash.c:125
long nsegs
Definition: dynahash.c:185